mirror of
https://github.com/onyx-dot-app/onyx.git
synced 2026-04-15 03:42:52 +00:00
Compare commits
37 Commits
bo/pruning
...
dane/csv4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
99671b365d | ||
|
|
2facb367c2 | ||
|
|
8898a5ac57 | ||
|
|
a2f7221bc4 | ||
|
|
0d1452cc06 | ||
|
|
c432d39ec0 | ||
|
|
cdc61a155c | ||
|
|
fb41378796 | ||
|
|
46e6e1173e | ||
|
|
b1165c7408 | ||
|
|
ea5769bc4a | ||
|
|
8288483146 | ||
|
|
7437efe808 | ||
|
|
b61b59ffba | ||
|
|
bc3b5600bb | ||
|
|
80c48ff80e | ||
|
|
8cd205a2e5 | ||
|
|
722a159e2e | ||
|
|
8ac49fa587 | ||
|
|
b5c215ab91 | ||
|
|
2c456bebf6 | ||
|
|
dc413abbba | ||
|
|
ff1b3e88de | ||
|
|
ac39e2031c | ||
|
|
70f5a906d1 | ||
|
|
f2b4adc70a | ||
|
|
390a102f8a | ||
|
|
9978f5c2d5 | ||
|
|
cdf655e9f6 | ||
|
|
2aaf0b2f38 | ||
|
|
5fab2900c5 | ||
|
|
41eaa0c542 | ||
|
|
a496fc27e4 | ||
|
|
ae34379ba6 | ||
|
|
8f2bbe57f3 | ||
|
|
3294edbc72 | ||
|
|
efb2d11864 |
@@ -2,7 +2,6 @@ FROM ubuntu:26.04@sha256:cc925e589b7543b910fea57a240468940003fbfc0515245a495dd0a
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
default-jre \
|
||||
fd-find \
|
||||
fzf \
|
||||
git \
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "Onyx Dev Sandbox",
|
||||
"image": "onyxdotapp/onyx-devcontainer@sha256:0f02d9299928849c7b15f3b348dcfdcdcb64411ff7a4580cbc026a6ee7aa1554",
|
||||
"image": "onyxdotapp/onyx-devcontainer@sha256:12184169c5bcc9cca0388286d5ffe504b569bc9c37bfa631b76ee8eee2064055",
|
||||
"runArgs": ["--cap-add=NET_ADMIN", "--cap-add=NET_RAW"],
|
||||
"mounts": [
|
||||
"source=${localEnv:HOME}/.claude,target=/home/dev/.claude,type=bind",
|
||||
|
||||
@@ -96,14 +96,11 @@ def get_model_app() -> FastAPI:
|
||||
title="Onyx Model Server", version=__version__, lifespan=lifespan
|
||||
)
|
||||
if SENTRY_DSN:
|
||||
from onyx.configs.sentry import _add_instance_tags
|
||||
|
||||
sentry_sdk.init(
|
||||
dsn=SENTRY_DSN,
|
||||
integrations=[StarletteIntegration(), FastApiIntegration()],
|
||||
traces_sample_rate=0.1,
|
||||
release=__version__,
|
||||
before_send=_add_instance_tags,
|
||||
)
|
||||
logger.info("Sentry initialized")
|
||||
else:
|
||||
|
||||
@@ -63,14 +63,11 @@ logger = setup_logger()
|
||||
task_logger = get_task_logger(__name__)
|
||||
|
||||
if SENTRY_DSN:
|
||||
from onyx.configs.sentry import _add_instance_tags
|
||||
|
||||
sentry_sdk.init(
|
||||
dsn=SENTRY_DSN,
|
||||
integrations=[CeleryIntegration()],
|
||||
traces_sample_rate=0.1,
|
||||
release=__version__,
|
||||
before_send=_add_instance_tags,
|
||||
)
|
||||
logger.info("Sentry initialized")
|
||||
else:
|
||||
|
||||
@@ -135,13 +135,10 @@ def _docfetching_task(
|
||||
# Since connector_indexing_proxy_task spawns a new process using this function as
|
||||
# the entrypoint, we init Sentry here.
|
||||
if SENTRY_DSN:
|
||||
from onyx.configs.sentry import _add_instance_tags
|
||||
|
||||
sentry_sdk.init(
|
||||
dsn=SENTRY_DSN,
|
||||
traces_sample_rate=0.1,
|
||||
release=__version__,
|
||||
before_send=_add_instance_tags,
|
||||
)
|
||||
logger.info("Sentry initialized")
|
||||
else:
|
||||
|
||||
@@ -5,7 +5,6 @@ from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from datetime import timezone
|
||||
|
||||
import sentry_sdk
|
||||
from celery import Celery
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -557,27 +556,6 @@ def connector_document_extraction(
|
||||
|
||||
# save record of any failures at the connector level
|
||||
if failure is not None:
|
||||
if failure.exception is not None:
|
||||
with sentry_sdk.new_scope() as scope:
|
||||
scope.set_tag("stage", "connector_fetch")
|
||||
scope.set_tag("connector_source", db_connector.source.value)
|
||||
scope.set_tag("cc_pair_id", str(cc_pair_id))
|
||||
scope.set_tag("index_attempt_id", str(index_attempt_id))
|
||||
scope.set_tag("tenant_id", tenant_id)
|
||||
if failure.failed_document:
|
||||
scope.set_tag(
|
||||
"doc_id", failure.failed_document.document_id
|
||||
)
|
||||
if failure.failed_entity:
|
||||
scope.set_tag(
|
||||
"entity_id", failure.failed_entity.entity_id
|
||||
)
|
||||
scope.fingerprint = [
|
||||
"connector-fetch-failure",
|
||||
db_connector.source.value,
|
||||
type(failure.exception).__name__,
|
||||
]
|
||||
sentry_sdk.capture_exception(failure.exception)
|
||||
total_failures += 1
|
||||
with get_session_with_current_tenant() as db_session:
|
||||
create_index_attempt_error(
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
from typing import Any
|
||||
|
||||
from sentry_sdk.types import Event
|
||||
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
_instance_id_resolved = False
|
||||
|
||||
|
||||
def _add_instance_tags(
|
||||
event: Event,
|
||||
hint: dict[str, Any], # noqa: ARG001
|
||||
) -> Event | None:
|
||||
"""Sentry before_send hook that lazily attaches instance identification tags.
|
||||
|
||||
On the first event, resolves the instance UUID from the KV store (requires DB)
|
||||
and sets it as a global Sentry tag. Subsequent events pick it up automatically.
|
||||
"""
|
||||
global _instance_id_resolved
|
||||
|
||||
if _instance_id_resolved:
|
||||
return event
|
||||
|
||||
try:
|
||||
import sentry_sdk
|
||||
|
||||
from shared_configs.configs import MULTI_TENANT
|
||||
|
||||
if MULTI_TENANT:
|
||||
instance_id = "multi-tenant-cloud"
|
||||
else:
|
||||
from onyx.utils.telemetry import get_or_generate_uuid
|
||||
|
||||
instance_id = get_or_generate_uuid()
|
||||
|
||||
sentry_sdk.set_tag("instance_id", instance_id)
|
||||
|
||||
# Also set on this event since set_tag won't retroactively apply
|
||||
event.setdefault("tags", {})["instance_id"] = instance_id
|
||||
|
||||
# Only mark resolved after success — if DB wasn't ready, retry next event
|
||||
_instance_id_resolved = True
|
||||
except Exception:
|
||||
logger.debug("Failed to resolve instance_id for Sentry tagging")
|
||||
|
||||
return event
|
||||
@@ -10,6 +10,7 @@ from typing import Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
import boto3
|
||||
from backend.onyx.connectors.models import TabularSection
|
||||
from botocore.client import Config
|
||||
from botocore.credentials import RefreshableCredentials
|
||||
from botocore.exceptions import ClientError
|
||||
@@ -26,6 +27,10 @@ from onyx.configs.constants import FileOrigin
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
|
||||
process_onyx_metadata,
|
||||
)
|
||||
from onyx.connectors.cross_connector_utils.tabular_section_utils import is_tabular_file
|
||||
from onyx.connectors.cross_connector_utils.tabular_section_utils import (
|
||||
tabular_file_to_sections,
|
||||
)
|
||||
from onyx.connectors.exceptions import ConnectorValidationError
|
||||
from onyx.connectors.exceptions import CredentialExpiredError
|
||||
from onyx.connectors.exceptions import InsufficientPermissionsError
|
||||
@@ -451,6 +456,40 @@ class BlobStorageConnector(LoadConnector, PollConnector):
|
||||
logger.exception(f"Error processing image {key}")
|
||||
continue
|
||||
|
||||
# Handle tabular files (xlsx, csv, tsv) — produce one
|
||||
# TabularSection per sheet (or per file for csv/tsv)
|
||||
# instead of a flat TextSection.
|
||||
if is_tabular_file(file_name):
|
||||
try:
|
||||
downloaded_file = self._download_object(key)
|
||||
if downloaded_file is None:
|
||||
continue
|
||||
tabular_sections = tabular_file_to_sections(
|
||||
BytesIO(downloaded_file),
|
||||
file_name=file_name,
|
||||
link=link,
|
||||
)
|
||||
batch.append(
|
||||
Document(
|
||||
id=f"{self.bucket_type}:{self.bucket_name}:{key}",
|
||||
sections=(
|
||||
tabular_sections
|
||||
if tabular_sections
|
||||
else [TabularSection(link=link, text="")]
|
||||
),
|
||||
source=DocumentSource(self.bucket_type.value),
|
||||
semantic_identifier=file_name,
|
||||
doc_updated_at=last_modified,
|
||||
metadata={},
|
||||
)
|
||||
)
|
||||
if len(batch) == self.batch_size:
|
||||
yield batch
|
||||
batch = []
|
||||
except Exception:
|
||||
logger.exception(f"Error processing tabular file {key}")
|
||||
continue
|
||||
|
||||
# Handle text and document files
|
||||
try:
|
||||
downloaded_file = self._download_object(key)
|
||||
|
||||
@@ -43,7 +43,7 @@ def tabular_file_to_sections(
|
||||
|
||||
if lowered.endswith(".xlsx"):
|
||||
return [
|
||||
TabularSection(link=f"sheet:{sheet_title}", text=csv_text)
|
||||
TabularSection(link=f"{file_name} :: {sheet_title}", text=csv_text)
|
||||
for csv_text, sheet_title in xlsx_sheet_extraction(
|
||||
file, file_name=file_name
|
||||
)
|
||||
|
||||
@@ -15,6 +15,10 @@ from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
|
||||
)
|
||||
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import rate_limit_builder
|
||||
from onyx.connectors.cross_connector_utils.rate_limit_wrapper import rl_requests
|
||||
from onyx.connectors.cross_connector_utils.tabular_section_utils import is_tabular_file
|
||||
from onyx.connectors.cross_connector_utils.tabular_section_utils import (
|
||||
tabular_file_to_sections,
|
||||
)
|
||||
from onyx.connectors.drupal_wiki.models import DrupalWikiCheckpoint
|
||||
from onyx.connectors.drupal_wiki.models import DrupalWikiPage
|
||||
from onyx.connectors.drupal_wiki.models import DrupalWikiPageResponse
|
||||
@@ -33,6 +37,7 @@ from onyx.connectors.models import DocumentFailure
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
from onyx.connectors.models import ImageSection
|
||||
from onyx.connectors.models import SlimDocument
|
||||
from onyx.connectors.models import TabularSection
|
||||
from onyx.connectors.models import TextSection
|
||||
from onyx.file_processing.extract_file_text import extract_text_and_images
|
||||
from onyx.file_processing.extract_file_text import get_file_ext
|
||||
@@ -213,7 +218,7 @@ class DrupalWikiConnector(
|
||||
attachment: dict[str, Any],
|
||||
page_id: int,
|
||||
download_url: str,
|
||||
) -> tuple[list[TextSection | ImageSection], str | None]:
|
||||
) -> tuple[list[TextSection | ImageSection | TabularSection], str | None]:
|
||||
"""
|
||||
Process a single attachment and return generated sections.
|
||||
|
||||
@@ -226,7 +231,7 @@ class DrupalWikiConnector(
|
||||
Tuple of (sections, error_message). If error_message is not None, the
|
||||
sections list should be treated as invalid.
|
||||
"""
|
||||
sections: list[TextSection | ImageSection] = []
|
||||
sections: list[TextSection | ImageSection | TabularSection] = []
|
||||
|
||||
try:
|
||||
if not self._validate_attachment_filetype(attachment):
|
||||
@@ -273,6 +278,25 @@ class DrupalWikiConnector(
|
||||
|
||||
return sections, None
|
||||
|
||||
# Tabular attachments (xlsx, csv, tsv) — produce
|
||||
# TabularSections instead of a flat TextSection.
|
||||
if is_tabular_file(file_name):
|
||||
try:
|
||||
sections.extend(
|
||||
tabular_file_to_sections(
|
||||
BytesIO(raw_bytes),
|
||||
file_name=file_name,
|
||||
link=download_url,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to extract tabular sections from {file_name}: {e}"
|
||||
)
|
||||
if not sections:
|
||||
return [], f"No content extracted from tabular file {file_name}"
|
||||
return sections, None
|
||||
|
||||
image_counter = 0
|
||||
|
||||
def _store_embedded_image(image_data: bytes, image_name: str) -> None:
|
||||
@@ -497,7 +521,7 @@ class DrupalWikiConnector(
|
||||
page_url = build_drupal_wiki_document_id(self.base_url, page.id)
|
||||
|
||||
# Create sections with just the page content
|
||||
sections: list[TextSection | ImageSection] = [
|
||||
sections: list[TextSection | ImageSection | TabularSection] = [
|
||||
TextSection(text=text_content, link=page_url)
|
||||
]
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from typing import IO
|
||||
@@ -12,11 +13,16 @@ from onyx.configs.constants import FileOrigin
|
||||
from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
|
||||
process_onyx_metadata,
|
||||
)
|
||||
from onyx.connectors.cross_connector_utils.tabular_section_utils import is_tabular_file
|
||||
from onyx.connectors.cross_connector_utils.tabular_section_utils import (
|
||||
tabular_file_to_sections,
|
||||
)
|
||||
from onyx.connectors.interfaces import GenerateDocumentsOutput
|
||||
from onyx.connectors.interfaces import LoadConnector
|
||||
from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
from onyx.connectors.models import ImageSection
|
||||
from onyx.connectors.models import TabularSection
|
||||
from onyx.connectors.models import TextSection
|
||||
from onyx.file_processing.extract_file_text import extract_text_and_images
|
||||
from onyx.file_processing.extract_file_text import get_file_ext
|
||||
@@ -179,8 +185,32 @@ def _process_file(
|
||||
link = onyx_metadata.link or link
|
||||
|
||||
# Build sections: first the text as a single Section
|
||||
sections: list[TextSection | ImageSection] = []
|
||||
if extraction_result.text_content.strip():
|
||||
sections: list[TextSection | ImageSection | TabularSection] = []
|
||||
if is_tabular_file(file_name):
|
||||
# Produce TabularSections
|
||||
lowered_name = file_name.lower()
|
||||
if lowered_name.endswith(".xlsx"):
|
||||
file.seek(0)
|
||||
tabular_source: IO[bytes] = file
|
||||
else:
|
||||
tabular_source = BytesIO(
|
||||
extraction_result.text_content.encode("utf-8", errors="replace")
|
||||
)
|
||||
try:
|
||||
sections.extend(
|
||||
tabular_file_to_sections(
|
||||
file=tabular_source,
|
||||
file_name=file_name,
|
||||
link=link or "",
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process tabular file {file_name}: {e}")
|
||||
return []
|
||||
if not sections:
|
||||
logger.warning(f"No content extracted from tabular file {file_name}")
|
||||
return []
|
||||
elif extraction_result.text_content.strip():
|
||||
logger.debug(f"Creating TextSection for {file_name} with link: {link}")
|
||||
sections.append(
|
||||
TextSection(link=link, text=extraction_result.text_content.strip())
|
||||
|
||||
@@ -13,6 +13,10 @@ from pydantic import BaseModel
|
||||
from onyx.access.models import ExternalAccess
|
||||
from onyx.configs.constants import DocumentSource
|
||||
from onyx.configs.constants import FileOrigin
|
||||
from onyx.connectors.cross_connector_utils.tabular_section_utils import is_tabular_file
|
||||
from onyx.connectors.cross_connector_utils.tabular_section_utils import (
|
||||
tabular_file_to_sections,
|
||||
)
|
||||
from onyx.connectors.google_drive.constants import DRIVE_FOLDER_TYPE
|
||||
from onyx.connectors.google_drive.constants import DRIVE_SHORTCUT_TYPE
|
||||
from onyx.connectors.google_drive.models import GDriveMimeType
|
||||
@@ -28,15 +32,16 @@ from onyx.connectors.models import Document
|
||||
from onyx.connectors.models import DocumentFailure
|
||||
from onyx.connectors.models import ImageSection
|
||||
from onyx.connectors.models import SlimDocument
|
||||
from onyx.connectors.models import TabularSection
|
||||
from onyx.connectors.models import TextSection
|
||||
from onyx.file_processing.extract_file_text import extract_file_text
|
||||
from onyx.file_processing.extract_file_text import get_file_ext
|
||||
from onyx.file_processing.extract_file_text import pptx_to_text
|
||||
from onyx.file_processing.extract_file_text import read_docx_file
|
||||
from onyx.file_processing.extract_file_text import read_pdf_file
|
||||
from onyx.file_processing.extract_file_text import xlsx_to_text
|
||||
from onyx.file_processing.file_types import OnyxFileExtensions
|
||||
from onyx.file_processing.file_types import OnyxMimeTypes
|
||||
from onyx.file_processing.file_types import SPREADSHEET_MIME_TYPE
|
||||
from onyx.file_processing.image_utils import store_image_and_create_section
|
||||
from onyx.utils.logger import setup_logger
|
||||
from onyx.utils.variable_functionality import (
|
||||
@@ -289,7 +294,7 @@ def _download_and_extract_sections_basic(
|
||||
service: GoogleDriveService,
|
||||
allow_images: bool,
|
||||
size_threshold: int,
|
||||
) -> list[TextSection | ImageSection]:
|
||||
) -> list[TextSection | ImageSection | TabularSection]:
|
||||
"""Extract text and images from a Google Drive file."""
|
||||
file_id = file["id"]
|
||||
file_name = file["name"]
|
||||
@@ -308,7 +313,7 @@ def _download_and_extract_sections_basic(
|
||||
return []
|
||||
|
||||
# Store images for later processing
|
||||
sections: list[TextSection | ImageSection] = []
|
||||
sections: list[TextSection | ImageSection | TabularSection] = []
|
||||
try:
|
||||
section, embedded_id = store_image_and_create_section(
|
||||
image_data=response_call(),
|
||||
@@ -323,10 +328,9 @@ def _download_and_extract_sections_basic(
|
||||
logger.error(f"Failed to process image {file_name}: {e}")
|
||||
return sections
|
||||
|
||||
# For Google Docs, Sheets, and Slides, export as plain text
|
||||
# For Google Docs, Sheets, and Slides, export via the Drive API
|
||||
if mime_type in GOOGLE_MIME_TYPES_TO_EXPORT:
|
||||
export_mime_type = GOOGLE_MIME_TYPES_TO_EXPORT[mime_type]
|
||||
# Use the correct API call for exporting files
|
||||
request = service.files().export_media(
|
||||
fileId=file_id, mimeType=export_mime_type
|
||||
)
|
||||
@@ -335,6 +339,17 @@ def _download_and_extract_sections_basic(
|
||||
logger.warning(f"Failed to export {file_name} as {export_mime_type}")
|
||||
return []
|
||||
|
||||
if export_mime_type in OnyxMimeTypes.TABULAR_MIME_TYPES:
|
||||
# Synthesize an extension on the filename
|
||||
ext = ".xlsx" if export_mime_type == SPREADSHEET_MIME_TYPE else ".csv"
|
||||
return list(
|
||||
tabular_file_to_sections(
|
||||
io.BytesIO(response),
|
||||
file_name=f"{file_name}{ext}",
|
||||
link=link,
|
||||
)
|
||||
)
|
||||
|
||||
text = response.decode("utf-8")
|
||||
return [TextSection(link=link, text=text)]
|
||||
|
||||
@@ -356,9 +371,15 @@ def _download_and_extract_sections_basic(
|
||||
|
||||
elif (
|
||||
mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
or is_tabular_file(file_name)
|
||||
):
|
||||
text = xlsx_to_text(io.BytesIO(response_call()), file_name=file_name)
|
||||
return [TextSection(link=link, text=text)] if text else []
|
||||
return list(
|
||||
tabular_file_to_sections(
|
||||
io.BytesIO(response_call()),
|
||||
file_name=file_name,
|
||||
link=link,
|
||||
)
|
||||
)
|
||||
|
||||
elif (
|
||||
mime_type
|
||||
@@ -410,8 +431,9 @@ def _find_nth(haystack: str, needle: str, n: int, start: int = 0) -> int:
|
||||
|
||||
|
||||
def align_basic_advanced(
|
||||
basic_sections: list[TextSection | ImageSection], adv_sections: list[TextSection]
|
||||
) -> list[TextSection | ImageSection]:
|
||||
basic_sections: list[TextSection | ImageSection | TabularSection],
|
||||
adv_sections: list[TextSection],
|
||||
) -> list[TextSection | ImageSection | TabularSection]:
|
||||
"""Align the basic sections with the advanced sections.
|
||||
In particular, the basic sections contain all content of the file,
|
||||
including smart chips like dates and doc links. The advanced sections
|
||||
@@ -428,7 +450,7 @@ def align_basic_advanced(
|
||||
basic_full_text = "".join(
|
||||
[section.text for section in basic_sections if isinstance(section, TextSection)]
|
||||
)
|
||||
new_sections: list[TextSection | ImageSection] = []
|
||||
new_sections: list[TextSection | ImageSection | TabularSection] = []
|
||||
heading_start = 0
|
||||
for adv_ind in range(1, len(adv_sections)):
|
||||
heading = adv_sections[adv_ind].text.split(HEADING_DELIMITER)[0]
|
||||
@@ -599,7 +621,7 @@ def _convert_drive_item_to_document(
|
||||
"""
|
||||
Main entry point for converting a Google Drive file => Document object.
|
||||
"""
|
||||
sections: list[TextSection | ImageSection] = []
|
||||
sections: list[TextSection | ImageSection | TabularSection] = []
|
||||
|
||||
# Only construct these services when needed
|
||||
def _get_drive_service() -> GoogleDriveService:
|
||||
@@ -639,7 +661,9 @@ def _convert_drive_item_to_document(
|
||||
doc_id=file.get("id", ""),
|
||||
)
|
||||
if doc_sections:
|
||||
sections = cast(list[TextSection | ImageSection], doc_sections)
|
||||
sections = cast(
|
||||
list[TextSection | ImageSection | TabularSection], doc_sections
|
||||
)
|
||||
if any(SMART_CHIP_CHAR in section.text for section in doc_sections):
|
||||
logger.debug(
|
||||
f"found smart chips in {file.get('name')}, aligning with basic sections"
|
||||
|
||||
@@ -60,7 +60,12 @@ from onyx.connectors.models import ExternalAccess
|
||||
from onyx.connectors.models import HierarchyNode
|
||||
from onyx.connectors.models import ImageSection
|
||||
from onyx.connectors.models import SlimDocument
|
||||
from onyx.connectors.models import TabularSection
|
||||
from onyx.connectors.models import TextSection
|
||||
from onyx.connectors.cross_connector_utils.tabular_section_utils import (
|
||||
is_tabular_file,
|
||||
tabular_file_to_sections,
|
||||
)
|
||||
from onyx.connectors.sharepoint.connector_utils import get_sharepoint_external_access
|
||||
from onyx.db.enums import HierarchyNodeType
|
||||
from onyx.file_processing.extract_file_text import extract_text_and_images
|
||||
@@ -586,7 +591,7 @@ def _convert_driveitem_to_document_with_permissions(
|
||||
driveitem, f"Failed to download via graph api: {e}", e
|
||||
)
|
||||
|
||||
sections: list[TextSection | ImageSection] = []
|
||||
sections: list[TextSection | ImageSection | TabularSection] = []
|
||||
file_ext = get_file_ext(driveitem.name)
|
||||
|
||||
if not content_bytes:
|
||||
@@ -602,6 +607,19 @@ def _convert_driveitem_to_document_with_permissions(
|
||||
)
|
||||
image_section.link = driveitem.web_url
|
||||
sections.append(image_section)
|
||||
elif is_tabular_file(driveitem.name):
|
||||
try:
|
||||
sections.extend(
|
||||
tabular_file_to_sections(
|
||||
file=io.BytesIO(content_bytes),
|
||||
file_name=driveitem.name,
|
||||
link=driveitem.web_url or "",
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to extract tabular sections for '{driveitem.name}': {e}"
|
||||
)
|
||||
else:
|
||||
|
||||
def _store_embedded_image(img_data: bytes, img_name: str) -> None:
|
||||
|
||||
@@ -7,6 +7,7 @@ from onyx.indexing.chunking.image_section_chunker import ImageChunker
|
||||
from onyx.indexing.chunking.section_chunker import AccumulatorState
|
||||
from onyx.indexing.chunking.section_chunker import ChunkPayload
|
||||
from onyx.indexing.chunking.section_chunker import SectionChunker
|
||||
from onyx.indexing.chunking.tabular_section_chunker import TabularChunker
|
||||
from onyx.indexing.chunking.text_section_chunker import TextChunker
|
||||
from onyx.indexing.models import DocAwareChunk
|
||||
from onyx.natural_language_processing.utils import BaseTokenizer
|
||||
@@ -38,6 +39,7 @@ class DocumentChunker:
|
||||
chunk_splitter=chunk_splitter,
|
||||
),
|
||||
SectionType.IMAGE: ImageChunker(),
|
||||
SectionType.TABULAR: TabularChunker(tokenizer=tokenizer),
|
||||
}
|
||||
|
||||
def chunk(
|
||||
|
||||
272
backend/onyx/indexing/chunking/tabular_section_chunker.py
Normal file
272
backend/onyx/indexing/chunking/tabular_section_chunker.py
Normal file
@@ -0,0 +1,272 @@
|
||||
import csv
|
||||
import io
|
||||
from collections.abc import Iterable
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from onyx.connectors.models import Section
|
||||
from onyx.indexing.chunking.section_chunker import AccumulatorState
|
||||
from onyx.indexing.chunking.section_chunker import ChunkPayload
|
||||
from onyx.indexing.chunking.section_chunker import SectionChunker
|
||||
from onyx.indexing.chunking.section_chunker import SectionChunkerOutput
|
||||
from onyx.natural_language_processing.utils import BaseTokenizer
|
||||
from onyx.natural_language_processing.utils import count_tokens
|
||||
from onyx.natural_language_processing.utils import split_text_by_tokens
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
COLUMNS_MARKER = "Columns:"
|
||||
FIELD_VALUE_SEPARATOR = ", "
|
||||
ROW_JOIN = "\n"
|
||||
NEWLINE_TOKENS = 1
|
||||
|
||||
|
||||
class _ParsedRow(BaseModel):
|
||||
header: list[str]
|
||||
row: list[str]
|
||||
|
||||
|
||||
class _TokenizedText(BaseModel):
|
||||
text: str
|
||||
token_count: int
|
||||
|
||||
|
||||
def format_row(header: list[str], row: list[str]) -> str:
|
||||
"""
|
||||
A header-row combination is formatted like this:
|
||||
field1=value1, field2=value2, field3=value3
|
||||
"""
|
||||
pairs = _row_to_pairs(header, row)
|
||||
formatted = FIELD_VALUE_SEPARATOR.join(f"{h}={v}" for h, v in pairs)
|
||||
return formatted
|
||||
|
||||
|
||||
def format_columns_header(headers: list[str]) -> str:
|
||||
"""
|
||||
Format the column header line. Underscored headers get a
|
||||
space-substituted friendly alias in parens.
|
||||
Example:
|
||||
headers = ["id", "MTTR_hours"]
|
||||
=> "Columns: id, MTTR_hours (MTTR hours)"
|
||||
"""
|
||||
parts: list[str] = []
|
||||
for header in headers:
|
||||
friendly = header
|
||||
if "_" in header:
|
||||
friendly = f'{header} ({header.replace("_", " ")})'
|
||||
parts.append(friendly)
|
||||
return f"{COLUMNS_MARKER} " + FIELD_VALUE_SEPARATOR.join(parts)
|
||||
|
||||
|
||||
def parse_section(section: Section) -> list[_ParsedRow]:
|
||||
"""Parse CSV into headers + rows. First non-empty row is the header;
|
||||
blank rows are skipped."""
|
||||
section_text = section.text or ""
|
||||
if not section_text.strip():
|
||||
return []
|
||||
|
||||
reader = csv.reader(io.StringIO(section_text))
|
||||
non_empty_rows = [row for row in reader if any(cell.strip() for cell in row)]
|
||||
|
||||
if not non_empty_rows:
|
||||
return []
|
||||
|
||||
header, *data_rows = non_empty_rows
|
||||
return [_ParsedRow(header=header, row=row) for row in data_rows]
|
||||
|
||||
|
||||
def _row_to_pairs(headers: list[str], row: list[str]) -> list[tuple[str, str]]:
|
||||
return [(h, v) for h, v in zip(headers, row) if v.strip()]
|
||||
|
||||
|
||||
def pack_chunk(chunk: str, new_row: str) -> str:
|
||||
return chunk + "\n" + new_row
|
||||
|
||||
|
||||
def _split_row_by_pairs(
|
||||
pairs: list[tuple[str, str]],
|
||||
tokenizer: BaseTokenizer,
|
||||
max_tokens: int,
|
||||
) -> list[_TokenizedText]:
|
||||
"""Greedily pack pairs into max-sized pieces. Any single pair that
|
||||
itself exceeds ``max_tokens`` is token-split at id boundaries.
|
||||
No headers."""
|
||||
separator_tokens = count_tokens(FIELD_VALUE_SEPARATOR, tokenizer)
|
||||
pieces: list[_TokenizedText] = []
|
||||
current_parts: list[str] = []
|
||||
current_tokens = 0
|
||||
|
||||
for pair in pairs:
|
||||
pair_str = f"{pair[0]}={pair[1]}"
|
||||
pair_tokens = count_tokens(pair_str, tokenizer)
|
||||
increment = pair_tokens if not current_parts else separator_tokens + pair_tokens
|
||||
|
||||
if current_tokens + increment <= max_tokens:
|
||||
current_parts.append(pair_str)
|
||||
current_tokens += increment
|
||||
continue
|
||||
|
||||
if current_parts:
|
||||
pieces.append(
|
||||
_TokenizedText(
|
||||
text=FIELD_VALUE_SEPARATOR.join(current_parts),
|
||||
token_count=current_tokens,
|
||||
)
|
||||
)
|
||||
current_parts = []
|
||||
current_tokens = 0
|
||||
|
||||
if pair_tokens > max_tokens:
|
||||
for split_text in split_text_by_tokens(pair_str, tokenizer, max_tokens):
|
||||
pieces.append(
|
||||
_TokenizedText(
|
||||
text=split_text,
|
||||
token_count=count_tokens(split_text, tokenizer),
|
||||
)
|
||||
)
|
||||
else:
|
||||
current_parts = [pair_str]
|
||||
current_tokens = pair_tokens
|
||||
|
||||
if current_parts:
|
||||
pieces.append(
|
||||
_TokenizedText(
|
||||
text=FIELD_VALUE_SEPARATOR.join(current_parts),
|
||||
token_count=current_tokens,
|
||||
)
|
||||
)
|
||||
return pieces
|
||||
|
||||
|
||||
def _build_chunk_from_scratch(
|
||||
pairs: list[tuple[str, str]],
|
||||
formatted_row: str,
|
||||
row_tokens: int,
|
||||
column_header: str,
|
||||
column_header_tokens: int,
|
||||
sheet_header: str,
|
||||
sheet_header_tokens: int,
|
||||
tokenizer: BaseTokenizer,
|
||||
max_tokens: int,
|
||||
) -> list[_TokenizedText]:
|
||||
# 1. Row alone is too large — split by pairs, no headers.
|
||||
if row_tokens > max_tokens:
|
||||
return _split_row_by_pairs(pairs, tokenizer, max_tokens)
|
||||
|
||||
chunk = formatted_row
|
||||
chunk_tokens = row_tokens
|
||||
|
||||
# 2. Attempt to add column header
|
||||
candidate_tokens = column_header_tokens + NEWLINE_TOKENS + chunk_tokens
|
||||
if candidate_tokens <= max_tokens:
|
||||
chunk = column_header + ROW_JOIN + chunk
|
||||
chunk_tokens = candidate_tokens
|
||||
|
||||
# 3. Attempt to add sheet header
|
||||
if sheet_header:
|
||||
candidate_tokens = sheet_header_tokens + NEWLINE_TOKENS + chunk_tokens
|
||||
if candidate_tokens <= max_tokens:
|
||||
chunk = sheet_header + ROW_JOIN + chunk
|
||||
chunk_tokens = candidate_tokens
|
||||
|
||||
return [_TokenizedText(text=chunk, token_count=chunk_tokens)]
|
||||
|
||||
|
||||
def parse_to_chunks(
|
||||
rows: Iterable[_ParsedRow],
|
||||
sheet_header: str,
|
||||
tokenizer: BaseTokenizer,
|
||||
max_tokens: int,
|
||||
) -> list[str]:
|
||||
rows_list = list(rows)
|
||||
if not rows_list:
|
||||
return []
|
||||
|
||||
column_header = format_columns_header(rows_list[0].header)
|
||||
column_header_tokens = count_tokens(column_header, tokenizer)
|
||||
sheet_header_tokens = count_tokens(sheet_header, tokenizer) if sheet_header else 0
|
||||
|
||||
chunks: list[str] = []
|
||||
current_chunk = ""
|
||||
current_chunk_tokens = 0
|
||||
|
||||
for row in rows_list:
|
||||
pairs: list[tuple[str, str]] = _row_to_pairs(row.header, row.row)
|
||||
formatted = format_row(row.header, row.row)
|
||||
row_tokens = count_tokens(formatted, tokenizer)
|
||||
|
||||
if current_chunk:
|
||||
# Attempt to pack it in (additive approximation)
|
||||
if current_chunk_tokens + NEWLINE_TOKENS + row_tokens <= max_tokens:
|
||||
current_chunk = pack_chunk(current_chunk, formatted)
|
||||
current_chunk_tokens += NEWLINE_TOKENS + row_tokens
|
||||
continue
|
||||
# Doesn't fit — flush and start new
|
||||
chunks.append(current_chunk)
|
||||
current_chunk = ""
|
||||
current_chunk_tokens = 0
|
||||
|
||||
# Build chunk from scratch
|
||||
for piece in _build_chunk_from_scratch(
|
||||
pairs=pairs,
|
||||
formatted_row=formatted,
|
||||
row_tokens=row_tokens,
|
||||
column_header=column_header,
|
||||
column_header_tokens=column_header_tokens,
|
||||
sheet_header=sheet_header,
|
||||
sheet_header_tokens=sheet_header_tokens,
|
||||
tokenizer=tokenizer,
|
||||
max_tokens=max_tokens,
|
||||
):
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk)
|
||||
current_chunk = piece.text
|
||||
current_chunk_tokens = piece.token_count
|
||||
|
||||
# Flush remaining
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk)
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
class TabularChunker(SectionChunker):
|
||||
def __init__(self, tokenizer: BaseTokenizer) -> None:
|
||||
self.tokenizer = tokenizer
|
||||
|
||||
def chunk_section(
|
||||
self,
|
||||
section: Section,
|
||||
accumulator: AccumulatorState,
|
||||
content_token_limit: int,
|
||||
) -> SectionChunkerOutput:
|
||||
payloads = accumulator.flush_to_list()
|
||||
|
||||
parsed_rows = parse_section(section)
|
||||
if not parsed_rows:
|
||||
logger.warning(
|
||||
f"TabularChunker: skipping unparseable section (link={section.link})"
|
||||
)
|
||||
return SectionChunkerOutput(
|
||||
payloads=payloads, accumulator=AccumulatorState()
|
||||
)
|
||||
|
||||
sheet_header = section.link or ""
|
||||
chunk_texts = parse_to_chunks(
|
||||
rows=parsed_rows,
|
||||
sheet_header=sheet_header,
|
||||
tokenizer=self.tokenizer,
|
||||
max_tokens=content_token_limit,
|
||||
)
|
||||
|
||||
for i, text in enumerate(chunk_texts):
|
||||
payloads.append(
|
||||
ChunkPayload(
|
||||
text=text,
|
||||
links={0: section.link or ""},
|
||||
is_continuation=(i > 0),
|
||||
)
|
||||
)
|
||||
return SectionChunkerOutput(payloads=payloads, accumulator=AccumulatorState())
|
||||
@@ -10,6 +10,7 @@ from onyx.indexing.chunking.section_chunker import SectionChunker
|
||||
from onyx.indexing.chunking.section_chunker import SectionChunkerOutput
|
||||
from onyx.natural_language_processing.utils import BaseTokenizer
|
||||
from onyx.natural_language_processing.utils import count_tokens
|
||||
from onyx.natural_language_processing.utils import split_text_by_tokens
|
||||
from onyx.utils.text_processing import clean_text
|
||||
from onyx.utils.text_processing import shared_precompare_cleanup
|
||||
from shared_configs.configs import STRICT_CHUNK_TOKEN_LIMIT
|
||||
@@ -90,8 +91,8 @@ class TextChunker(SectionChunker):
|
||||
STRICT_CHUNK_TOKEN_LIMIT
|
||||
and count_tokens(split_text, self.tokenizer) > content_token_limit
|
||||
):
|
||||
smaller_chunks = self._split_oversized_chunk(
|
||||
split_text, content_token_limit
|
||||
smaller_chunks = split_text_by_tokens(
|
||||
split_text, self.tokenizer, content_token_limit
|
||||
)
|
||||
for j, small_chunk in enumerate(smaller_chunks):
|
||||
payloads.append(
|
||||
@@ -114,16 +115,3 @@ class TextChunker(SectionChunker):
|
||||
payloads=payloads,
|
||||
accumulator=AccumulatorState(),
|
||||
)
|
||||
|
||||
def _split_oversized_chunk(self, text: str, content_token_limit: int) -> list[str]:
|
||||
tokens = self.tokenizer.tokenize(text)
|
||||
chunks: list[str] = []
|
||||
start = 0
|
||||
total_tokens = len(tokens)
|
||||
while start < total_tokens:
|
||||
end = min(start + content_token_limit, total_tokens)
|
||||
token_chunk = tokens[start:end]
|
||||
chunk_text = " ".join(token_chunk)
|
||||
chunks.append(chunk_text)
|
||||
start = end
|
||||
return chunks
|
||||
|
||||
@@ -3,8 +3,6 @@ from abc import ABC
|
||||
from abc import abstractmethod
|
||||
from collections import defaultdict
|
||||
|
||||
import sentry_sdk
|
||||
|
||||
from onyx.connectors.models import ConnectorFailure
|
||||
from onyx.connectors.models import ConnectorStopSignal
|
||||
from onyx.connectors.models import DocumentFailure
|
||||
@@ -293,13 +291,6 @@ def embed_chunks_with_failure_handling(
|
||||
)
|
||||
embedded_chunks.extend(doc_embedded_chunks)
|
||||
except Exception as e:
|
||||
with sentry_sdk.new_scope() as scope:
|
||||
scope.set_tag("stage", "embedding")
|
||||
scope.set_tag("doc_id", doc_id)
|
||||
if tenant_id:
|
||||
scope.set_tag("tenant_id", tenant_id)
|
||||
scope.fingerprint = ["embedding-failure", type(e).__name__]
|
||||
sentry_sdk.capture_exception(e)
|
||||
logger.exception(f"Failed to embed chunks for document '{doc_id}'")
|
||||
failures.append(
|
||||
ConnectorFailure(
|
||||
|
||||
@@ -5,7 +5,6 @@ from collections.abc import Iterator
|
||||
from contextlib import contextmanager
|
||||
from typing import Protocol
|
||||
|
||||
import sentry_sdk
|
||||
from pydantic import BaseModel
|
||||
from pydantic import ConfigDict
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -333,13 +332,6 @@ def index_doc_batch_with_handler(
|
||||
except Exception as e:
|
||||
# don't log the batch directly, it's too much text
|
||||
document_ids = [doc.id for doc in document_batch]
|
||||
with sentry_sdk.new_scope() as scope:
|
||||
scope.set_tag("stage", "indexing_pipeline")
|
||||
scope.set_tag("tenant_id", tenant_id)
|
||||
scope.set_tag("batch_size", str(len(document_batch)))
|
||||
scope.set_extra("document_ids", document_ids)
|
||||
scope.fingerprint = ["indexing-pipeline-failure", type(e).__name__]
|
||||
sentry_sdk.capture_exception(e)
|
||||
logger.exception(f"Failed to index document batch: {document_ids}")
|
||||
|
||||
index_pipeline_result = IndexingPipelineResult(
|
||||
|
||||
@@ -6,7 +6,6 @@ from itertools import chain
|
||||
from itertools import groupby
|
||||
|
||||
import httpx
|
||||
import sentry_sdk
|
||||
|
||||
from onyx.connectors.models import ConnectorFailure
|
||||
from onyx.connectors.models import DocumentFailure
|
||||
@@ -89,12 +88,6 @@ def write_chunks_to_vector_db_with_backoff(
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
with sentry_sdk.new_scope() as scope:
|
||||
scope.set_tag("stage", "vector_db_write")
|
||||
scope.set_tag("doc_id", doc_id)
|
||||
scope.set_tag("tenant_id", index_batch_params.tenant_id)
|
||||
scope.fingerprint = ["vector-db-write-failure", type(e).__name__]
|
||||
sentry_sdk.capture_exception(e)
|
||||
logger.exception(
|
||||
f"Failed to write document chunks for '{doc_id}' to vector db"
|
||||
)
|
||||
|
||||
@@ -434,14 +434,11 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
|
||||
lifespan=lifespan_override or lifespan,
|
||||
)
|
||||
if SENTRY_DSN:
|
||||
from onyx.configs.sentry import _add_instance_tags
|
||||
|
||||
sentry_sdk.init(
|
||||
dsn=SENTRY_DSN,
|
||||
integrations=[StarletteIntegration(), FastApiIntegration()],
|
||||
traces_sample_rate=0.1,
|
||||
release=__version__,
|
||||
before_send=_add_instance_tags,
|
||||
)
|
||||
logger.info("Sentry initialized")
|
||||
else:
|
||||
|
||||
@@ -201,6 +201,33 @@ def count_tokens(
|
||||
return total
|
||||
|
||||
|
||||
def split_text_by_tokens(
|
||||
text: str,
|
||||
tokenizer: BaseTokenizer,
|
||||
max_tokens: int,
|
||||
) -> list[str]:
|
||||
"""Split ``text`` into pieces of ≤ ``max_tokens`` tokens each, via
|
||||
encode/decode at token-id boundaries.
|
||||
|
||||
Note: the returned pieces are not strictly guaranteed to re-tokenize to
|
||||
≤ max_tokens. BPE merges at window boundaries may drift by a few tokens,
|
||||
and cuts landing mid-multi-byte-UTF-8-character produce replacement
|
||||
characters on decode. Good enough for "best-effort" splitting of
|
||||
oversized content, not for hard limit enforcement.
|
||||
"""
|
||||
if not text:
|
||||
return []
|
||||
|
||||
token_ids: list[int] = []
|
||||
for start in range(0, len(text), _ENCODE_CHUNK_SIZE):
|
||||
token_ids.extend(tokenizer.encode(text[start : start + _ENCODE_CHUNK_SIZE]))
|
||||
|
||||
return [
|
||||
tokenizer.decode(token_ids[start : start + max_tokens])
|
||||
for start in range(0, len(token_ids), max_tokens)
|
||||
]
|
||||
|
||||
|
||||
def tokenizer_trim_content(
|
||||
content: str, desired_length: int, tokenizer: BaseTokenizer
|
||||
) -> str:
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
from typing import cast
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from sentry_sdk.types import Event
|
||||
|
||||
import onyx.configs.sentry as sentry_module
|
||||
from onyx.configs.sentry import _add_instance_tags
|
||||
|
||||
|
||||
def _event(data: dict) -> Event:
|
||||
"""Helper to create a Sentry Event from a plain dict for testing."""
|
||||
return cast(Event, data)
|
||||
|
||||
|
||||
def _reset_state() -> None:
|
||||
"""Reset the module-level resolved flag between tests."""
|
||||
sentry_module._instance_id_resolved = False
|
||||
|
||||
|
||||
class TestAddInstanceTags:
|
||||
def setup_method(self) -> None:
|
||||
_reset_state()
|
||||
|
||||
@patch("onyx.utils.telemetry.get_or_generate_uuid", return_value="test-uuid-1234")
|
||||
@patch("sentry_sdk.set_tag")
|
||||
def test_first_event_sets_instance_id(
|
||||
self, mock_set_tag: MagicMock, mock_uuid: MagicMock
|
||||
) -> None:
|
||||
result = _add_instance_tags(_event({"message": "test error"}), {})
|
||||
|
||||
assert result is not None
|
||||
assert result["tags"]["instance_id"] == "test-uuid-1234"
|
||||
mock_set_tag.assert_called_once_with("instance_id", "test-uuid-1234")
|
||||
mock_uuid.assert_called_once()
|
||||
|
||||
@patch("onyx.utils.telemetry.get_or_generate_uuid", return_value="test-uuid-1234")
|
||||
@patch("sentry_sdk.set_tag")
|
||||
def test_second_event_skips_resolution(
|
||||
self, _mock_set_tag: MagicMock, mock_uuid: MagicMock
|
||||
) -> None:
|
||||
_add_instance_tags(_event({"message": "first"}), {})
|
||||
result = _add_instance_tags(_event({"message": "second"}), {})
|
||||
|
||||
assert result is not None
|
||||
assert "tags" not in result # second event not modified
|
||||
mock_uuid.assert_called_once() # only resolved once
|
||||
|
||||
@patch(
|
||||
"onyx.utils.telemetry.get_or_generate_uuid",
|
||||
side_effect=Exception("DB unavailable"),
|
||||
)
|
||||
@patch("sentry_sdk.set_tag")
|
||||
def test_resolution_failure_still_returns_event(
|
||||
self, _mock_set_tag: MagicMock, _mock_uuid: MagicMock
|
||||
) -> None:
|
||||
result = _add_instance_tags(_event({"message": "test error"}), {})
|
||||
|
||||
assert result is not None
|
||||
assert result["message"] == "test error"
|
||||
assert "tags" not in result or "instance_id" not in result.get("tags", {})
|
||||
|
||||
@patch(
|
||||
"onyx.utils.telemetry.get_or_generate_uuid",
|
||||
side_effect=Exception("DB unavailable"),
|
||||
)
|
||||
@patch("sentry_sdk.set_tag")
|
||||
def test_resolution_failure_retries_on_next_event(
|
||||
self, _mock_set_tag: MagicMock, mock_uuid: MagicMock
|
||||
) -> None:
|
||||
"""If resolution fails (e.g. DB not ready), retry on the next event."""
|
||||
_add_instance_tags(_event({"message": "first"}), {})
|
||||
_add_instance_tags(_event({"message": "second"}), {})
|
||||
|
||||
assert mock_uuid.call_count == 2 # retried on second event
|
||||
|
||||
@patch("onyx.utils.telemetry.get_or_generate_uuid", return_value="test-uuid-1234")
|
||||
@patch("sentry_sdk.set_tag")
|
||||
def test_preserves_existing_tags(
|
||||
self, _mock_set_tag: MagicMock, _mock_uuid: MagicMock
|
||||
) -> None:
|
||||
result = _add_instance_tags(
|
||||
_event({"message": "test", "tags": {"existing": "tag"}}), {}
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["tags"]["existing"] == "tag"
|
||||
assert result["tags"]["instance_id"] == "test-uuid-1234"
|
||||
551
backend/tests/unit/onyx/indexing/test_tabular_section_chunker.py
Normal file
551
backend/tests/unit/onyx/indexing/test_tabular_section_chunker.py
Normal file
@@ -0,0 +1,551 @@
|
||||
"""End-to-end tests for `TabularChunker.chunk_section`.
|
||||
|
||||
Each test is structured as:
|
||||
INPUT — the CSV text passed to the chunker + token budget + link
|
||||
EXPECTED — the exact chunk texts the chunker should emit
|
||||
ACT — a single call to `chunk_section`
|
||||
ASSERT — literal equality against the expected chunk texts
|
||||
|
||||
A character-level tokenizer (1 char == 1 token) is used so token-budget
|
||||
arithmetic is deterministic and expected chunks can be spelled out
|
||||
exactly.
|
||||
"""
|
||||
|
||||
from onyx.connectors.models import Section
|
||||
from onyx.connectors.models import TabularSection
|
||||
from onyx.indexing.chunking.section_chunker import AccumulatorState
|
||||
from onyx.indexing.chunking.tabular_section_chunker import TabularChunker
|
||||
from onyx.natural_language_processing.utils import BaseTokenizer
|
||||
|
||||
|
||||
class CharTokenizer(BaseTokenizer):
|
||||
def encode(self, string: str) -> list[int]:
|
||||
return [ord(c) for c in string]
|
||||
|
||||
def tokenize(self, string: str) -> list[str]:
|
||||
return list(string)
|
||||
|
||||
def decode(self, tokens: list[int]) -> str:
|
||||
return "".join(chr(t) for t in tokens)
|
||||
|
||||
|
||||
def _make_chunker() -> TabularChunker:
|
||||
return TabularChunker(tokenizer=CharTokenizer())
|
||||
|
||||
|
||||
def _tabular_section(text: str, link: str = "sheet:Test") -> Section:
|
||||
return TabularSection(text=text, link=link)
|
||||
|
||||
|
||||
class TestTabularChunkerChunkSection:
|
||||
def test_simple_csv_all_rows_fit_one_chunk(self) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
csv_text = "Name,Age,City\n" "Alice,30,NYC\n" "Bob,25,SF\n"
|
||||
link = "sheet:People"
|
||||
content_token_limit = 500
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
expected_texts = [
|
||||
(
|
||||
"sheet:People\n"
|
||||
"Columns: Name, Age, City\n"
|
||||
"Name=Alice, Age=30, City=NYC\n"
|
||||
"Name=Bob, Age=25, City=SF"
|
||||
),
|
||||
]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=content_token_limit,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
assert [p.is_continuation for p in out.payloads] == [False]
|
||||
assert all(p.links == {0: link} for p in out.payloads)
|
||||
assert out.accumulator.is_empty()
|
||||
|
||||
def test_overflow_splits_into_two_deterministic_chunks(self) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# prelude = "sheet:S\nColumns: col, val" (25 chars = 25 tokens)
|
||||
# At content_token_limit=57, row_budget = max(16, 57-31-1) = 25.
|
||||
# Each row "col=a, val=1" is 12 tokens; two rows + \n = 25 (fits),
|
||||
# three rows + 2×\n = 38 (overflows) → split after 2 rows.
|
||||
csv_text = "col,val\n" "a,1\n" "b,2\n" "c,3\n" "d,4\n"
|
||||
link = "sheet:S"
|
||||
content_token_limit = 57
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
expected_texts = [
|
||||
("sheet:S\n" "Columns: col, val\n" "col=a, val=1\n" "col=b, val=2"),
|
||||
("sheet:S\n" "Columns: col, val\n" "col=c, val=3\n" "col=d, val=4"),
|
||||
]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=content_token_limit,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
# First chunk is fresh; subsequent chunks mark as continuations.
|
||||
assert [p.is_continuation for p in out.payloads] == [False, True]
|
||||
# Link carries through every chunk.
|
||||
assert all(p.links == {0: link} for p in out.payloads)
|
||||
|
||||
# Add back in shortly
|
||||
# def test_header_only_csv_produces_single_prelude_chunk(self) -> None:
|
||||
# # --- INPUT -----------------------------------------------------
|
||||
# csv_text = "col1,col2\n"
|
||||
# link = "sheet:Headers"
|
||||
|
||||
# # --- EXPECTED --------------------------------------------------
|
||||
# expected_texts = [
|
||||
# "sheet:Headers\nColumns: col1, col2",
|
||||
# ]
|
||||
|
||||
# # --- ACT -------------------------------------------------------
|
||||
# out = _make_chunker().chunk_section(
|
||||
# _tabular_section(csv_text, link=link),
|
||||
# AccumulatorState(),
|
||||
# content_token_limit=500,
|
||||
# )
|
||||
|
||||
# # --- ASSERT ----------------------------------------------------
|
||||
# assert [p.text for p in out.payloads] == expected_texts
|
||||
|
||||
def test_empty_cells_dropped_from_chunk_text(self) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# Alice's Age is empty; Bob's City is empty. Empty cells should
|
||||
# not appear as `field=` pairs in the output.
|
||||
csv_text = "Name,Age,City\n" "Alice,,NYC\n" "Bob,25,\n"
|
||||
link = "sheet:P"
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
expected_texts = [
|
||||
(
|
||||
"sheet:P\n"
|
||||
"Columns: Name, Age, City\n"
|
||||
"Name=Alice, City=NYC\n"
|
||||
"Name=Bob, Age=25"
|
||||
),
|
||||
]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=500,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
|
||||
def test_quoted_commas_in_csv_preserved_as_one_field(self) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# "Hello, world" is quoted in the CSV, so csv.reader parses it as
|
||||
# a single field. The surrounding quotes are stripped during
|
||||
# decoding, so the chunk text carries the bare value.
|
||||
csv_text = "Name,Notes\n" 'Alice,"Hello, world"\n'
|
||||
link = "sheet:P"
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
expected_texts = [
|
||||
("sheet:P\n" "Columns: Name, Notes\n" "Name=Alice, Notes=Hello, world"),
|
||||
]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=500,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
|
||||
def test_blank_rows_in_csv_are_skipped(self) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# Stray blank rows in the CSV (e.g. export artifacts) shouldn't
|
||||
# produce ghost rows in the output.
|
||||
csv_text = "A,B\n" "\n" "1,2\n" "\n" "\n" "3,4\n"
|
||||
link = "sheet:S"
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
expected_texts = [
|
||||
("sheet:S\n" "Columns: A, B\n" "A=1, B=2\n" "A=3, B=4"),
|
||||
]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=500,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
|
||||
def test_accumulator_flushes_before_tabular_chunks(self) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# A text accumulator was populated by the prior text section.
|
||||
# Tabular sections are structural boundaries, so the pending
|
||||
# text is flushed as its own chunk before the tabular content.
|
||||
pending_text = "prior paragraph from an earlier text section"
|
||||
pending_link = "prev-link"
|
||||
|
||||
csv_text = "a,b\n" "1,2\n"
|
||||
link = "sheet:S"
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
expected_texts = [
|
||||
pending_text, # flushed accumulator
|
||||
("sheet:S\n" "Columns: a, b\n" "a=1, b=2"),
|
||||
]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(
|
||||
text=pending_text,
|
||||
link_offsets={0: pending_link},
|
||||
),
|
||||
content_token_limit=500,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
# Flushed chunk keeps the prior text's link; tabular chunk uses
|
||||
# the tabular section's link.
|
||||
assert out.payloads[0].links == {0: pending_link}
|
||||
assert out.payloads[1].links == {0: link}
|
||||
# Accumulator resets — tabular section is a structural boundary.
|
||||
assert out.accumulator.is_empty()
|
||||
|
||||
def test_multi_row_packing_under_budget_emits_single_chunk(self) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# Three small rows (20 tokens each) under a generous
|
||||
# content_token_limit=100 should pack into ONE chunk — prelude
|
||||
# emitted once, rows stacked beneath it.
|
||||
csv_text = (
|
||||
"x\n" "aaaaaaaaaaaaaaaaaa\n" "bbbbbbbbbbbbbbbbbb\n" "cccccccccccccccccc\n"
|
||||
)
|
||||
link = "S"
|
||||
content_token_limit = 100
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
# Each formatted row "x=<18-char value>" = 20 tokens.
|
||||
# Full chunk with sheet + Columns + 3 rows =
|
||||
# 1 + 1 + 10 + 1 + (20 + 1 + 20 + 1 + 20) = 75 tokens ≤ 100.
|
||||
# Single chunk carries all three rows.
|
||||
expected_texts = [
|
||||
"S\n"
|
||||
"Columns: x\n"
|
||||
"x=aaaaaaaaaaaaaaaaaa\n"
|
||||
"x=bbbbbbbbbbbbbbbbbb\n"
|
||||
"x=cccccccccccccccccc"
|
||||
]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=content_token_limit,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
assert [p.is_continuation for p in out.payloads] == [False]
|
||||
assert all(len(p.text) <= content_token_limit for p in out.payloads)
|
||||
|
||||
def test_packing_reserves_prelude_budget_so_every_chunk_has_full_prelude(
|
||||
self,
|
||||
) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# Budget (30) is large enough for all 5 bare rows (row_block =
|
||||
# 24 tokens) to pack as one chunk if the prelude were optional,
|
||||
# but [sheet] + Columns + 5_rows would be 41 tokens > 30. The
|
||||
# packing logic reserves space for the prelude: only 2 rows
|
||||
# pack per chunk (17 prelude overhead + 9 rows = 26 ≤ 30).
|
||||
# Every emitted chunk therefore carries its full prelude rather
|
||||
# than dropping Columns at emit time.
|
||||
csv_text = "x\n" "aa\n" "bb\n" "cc\n" "dd\n" "ee\n"
|
||||
link = "S"
|
||||
content_token_limit = 30
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
# Prelude overhead = 'S\nColumns: x\n' = 1+1+10+1 = 13.
|
||||
# Each row "x=XX" = 4 tokens, row separator "\n" = 1.
|
||||
# 3 rows: 13 + (4+1+4+1+4) = 27 ≤ 30 ✓
|
||||
# 4 rows: 13 + (4+1+4+1+4+1+4) = 32 > 30 ✗
|
||||
# → 3 rows in the first chunk, 2 rows in the second.
|
||||
expected_texts = [
|
||||
"S\nColumns: x\nx=aa\nx=bb\nx=cc",
|
||||
"S\nColumns: x\nx=dd\nx=ee",
|
||||
]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=content_token_limit,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
# Every chunk fits under the budget AND carries its full
|
||||
# prelude — that's the whole point of this check.
|
||||
assert all(len(p.text) <= content_token_limit for p in out.payloads)
|
||||
assert all("Columns: x" in p.text for p in out.payloads)
|
||||
|
||||
def test_oversized_row_splits_into_field_pieces_no_prelude(self) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# Single-row CSV whose formatted form ("field 1=1, ..." = 53
|
||||
# tokens) exceeds content_token_limit (20). Per the chunker's
|
||||
# rules, oversized rows are split at field boundaries into
|
||||
# pieces each ≤ max_tokens, and no prelude is added to split
|
||||
# pieces (they already consume the full budget). A 53-token row
|
||||
# packs into 3 field-boundary pieces under a 20-token budget.
|
||||
csv_text = "field 1,field 2,field 3,field 4,field 5\n" "1,2,3,4,5\n"
|
||||
link = "S"
|
||||
content_token_limit = 20
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
# Row = "field 1=1, field 2=2, field 3=3, field 4=4, field 5=5"
|
||||
# Fields @ 9 tokens each, ", " sep = 2 tokens.
|
||||
# "field 1=1, field 2=2" = 9+2+9 = 20 tokens ≤ 20 ✓
|
||||
# + ", field 3=3" = 20+2+9 = 31 > 20 → flush, start new
|
||||
# "field 3=3, field 4=4" = 9+2+9 = 20 ≤ 20 ✓
|
||||
# + ", field 5=5" = 20+2+9 = 31 > 20 → flush, start new
|
||||
# "field 5=5" = 9 ≤ 20 ✓
|
||||
# ceil(53 / 20) = 3 chunks.
|
||||
expected_texts = [
|
||||
"field 1=1, field 2=2",
|
||||
"field 3=3, field 4=4",
|
||||
"field 5=5",
|
||||
]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=content_token_limit,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
# Invariant: no chunk exceeds max_tokens.
|
||||
assert all(len(p.text) <= content_token_limit for p in out.payloads)
|
||||
# is_continuation: first chunk False, rest True.
|
||||
assert [p.is_continuation for p in out.payloads] == [False, True, True]
|
||||
|
||||
def test_empty_tabular_section_flushes_accumulator_and_resets_it(
|
||||
self,
|
||||
) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# Tabular sections are structural boundaries, so any pending text
|
||||
# buffer is flushed to a chunk before parsing the tabular content
|
||||
# — even if the tabular section itself is empty. The accumulator
|
||||
# is then reset.
|
||||
pending_text = "prior paragraph"
|
||||
pending_link_offsets = {0: "prev-link"}
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
expected_texts = [pending_text]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section("", link="sheet:Empty"),
|
||||
AccumulatorState(
|
||||
text=pending_text,
|
||||
link_offsets=pending_link_offsets,
|
||||
),
|
||||
content_token_limit=500,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
assert out.accumulator.is_empty()
|
||||
|
||||
def test_single_oversized_field_token_splits_at_id_boundaries(self) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# A single `field=value` pair that itself exceeds max_tokens can't
|
||||
# be split at field boundaries — there's only one field. The
|
||||
# chunker falls back to encoding the pair to token ids and
|
||||
# slicing at max-token-sized windows.
|
||||
#
|
||||
# CSV has one column "x" with a 50-char value. Formatted pair =
|
||||
# "x=" + 50 a's = 52 tokens. Budget = 10.
|
||||
csv_text = "x\n" + ("a" * 50) + "\n"
|
||||
link = "S"
|
||||
content_token_limit = 10
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
# 52-char pair at 10 tokens per window = 6 pieces:
|
||||
# [0:10) "x=aaaaaaaa" (10)
|
||||
# [10:20) "aaaaaaaaaa" (10)
|
||||
# [20:30) "aaaaaaaaaa" (10)
|
||||
# [30:40) "aaaaaaaaaa" (10)
|
||||
# [40:50) "aaaaaaaaaa" (10)
|
||||
# [50:52) "aa" (2)
|
||||
# Split pieces carry no prelude (they already consume the budget).
|
||||
expected_texts = [
|
||||
"x=aaaaaaaa",
|
||||
"aaaaaaaaaa",
|
||||
"aaaaaaaaaa",
|
||||
"aaaaaaaaaa",
|
||||
"aaaaaaaaaa",
|
||||
"aa",
|
||||
]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=content_token_limit,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
# Every piece is ≤ max_tokens — the invariant the token-level
|
||||
# fallback exists to enforce.
|
||||
assert all(len(p.text) <= content_token_limit for p in out.payloads)
|
||||
|
||||
def test_underscored_column_gets_friendly_alias_in_parens(self) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# Column headers with underscores get a space-substituted friendly
|
||||
# alias appended in parens on the `Columns:` line. Plain headers
|
||||
# pass through untouched.
|
||||
csv_text = "MTTR_hours,id,owner_name\n" "3,42,Alice\n"
|
||||
link = "sheet:M"
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
expected_texts = [
|
||||
(
|
||||
"sheet:M\n"
|
||||
"Columns: MTTR_hours (MTTR hours), id, owner_name (owner name)\n"
|
||||
"MTTR_hours=3, id=42, owner_name=Alice"
|
||||
),
|
||||
]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=500,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
|
||||
def test_oversized_row_between_small_rows_preserves_flanking_chunks(
|
||||
self,
|
||||
) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# State-machine check: small row, oversized row, small row. The
|
||||
# first small row should become a preluded chunk; the oversized
|
||||
# row flushes it and emits split fragments without prelude; then
|
||||
# the last small row picks up from wherever the split left off.
|
||||
#
|
||||
# Headers a,b,c,d. Row 1 and row 3 each have only column `a`
|
||||
# populated (tiny). Row 2 is a "fat" row with all four columns
|
||||
# populated.
|
||||
csv_text = "a,b,c,d\n" "1,,,\n" "xxx,yyy,zzz,www\n" "2,,,\n"
|
||||
link = "S"
|
||||
content_token_limit = 20
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
# Prelude = 'S\nColumns: a, b, c, d\n' = 1+1+19+1 = 22 > 20, so
|
||||
# sheet fits with the row but full Columns header does not.
|
||||
# Row 1 formatted = "a=1" (3). build_chunk_from_scratch:
|
||||
# cols+row = 20+3 = 23 > 20 → skip cols. sheet+row = 1+1+3 = 5
|
||||
# ≤ 20 → chunk = "S\na=1".
|
||||
# Row 2 formatted = "a=xxx, b=yyy, c=zzz, d=www" (26 > 20) →
|
||||
# flush "S\na=1" and split at pair boundaries:
|
||||
# "a=xxx, b=yyy, c=zzz" (19 ≤ 20 ✓)
|
||||
# "d=www" (5)
|
||||
# Row 3 formatted = "a=2" (3). can_pack onto "d=www" (5):
|
||||
# 5 + 3 + 1 = 9 ≤ 20 ✓ → packs. Trailing fragment from the
|
||||
# split absorbs the next small row, which is the current v2
|
||||
# behavior (the fragment becomes `current_chunk` and the next
|
||||
# small row is appended with the standard packing rules).
|
||||
expected_texts = [
|
||||
"S\na=1",
|
||||
"a=xxx, b=yyy, c=zzz",
|
||||
"d=www\na=2",
|
||||
]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=content_token_limit,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
assert all(len(p.text) <= content_token_limit for p in out.payloads)
|
||||
|
||||
def test_prelude_layering_column_header_fits_but_sheet_header_does_not(
|
||||
self,
|
||||
) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# Budget lets `Columns: x\nx=y` fit but not the additional sheet
|
||||
# header on top. The chunker should add the column header and
|
||||
# drop the sheet header.
|
||||
#
|
||||
# sheet = "LongSheetName" (13), cols = "Columns: x" (10),
|
||||
# row = "x=y" (3). Budget = 15.
|
||||
# cols + row: 10+1+3 = 14 ≤ 15 ✓
|
||||
# sheet + cols + row: 13+1+10+1+3 = 28 > 15 ✗
|
||||
csv_text = "x\n" "y\n"
|
||||
link = "LongSheetName"
|
||||
content_token_limit = 15
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
expected_texts = ["Columns: x\nx=y"]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=content_token_limit,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
|
||||
def test_prelude_layering_sheet_header_fits_but_column_header_does_not(
|
||||
self,
|
||||
) -> None:
|
||||
# --- INPUT -----------------------------------------------------
|
||||
# Budget is too small for the column header but leaves room for
|
||||
# the short sheet header. The chunker should fall back to just
|
||||
# sheet + row (its layered "try cols, then try sheet on top of
|
||||
# whatever we have" logic means sheet is attempted on the bare
|
||||
# row when cols didn't fit).
|
||||
#
|
||||
# sheet = "S" (1), cols = "Columns: ABC, DEF" (17),
|
||||
# row = "ABC=1, DEF=2" (12). Budget = 20.
|
||||
# cols + row: 17+1+12 = 30 > 20 ✗
|
||||
# sheet + row: 1+1+12 = 14 ≤ 20 ✓
|
||||
csv_text = "ABC,DEF\n" "1,2\n"
|
||||
link = "S"
|
||||
content_token_limit = 20
|
||||
|
||||
# --- EXPECTED --------------------------------------------------
|
||||
expected_texts = ["S\nABC=1, DEF=2"]
|
||||
|
||||
# --- ACT -------------------------------------------------------
|
||||
out = _make_chunker().chunk_section(
|
||||
_tabular_section(csv_text, link=link),
|
||||
AccumulatorState(),
|
||||
content_token_limit=content_token_limit,
|
||||
)
|
||||
|
||||
# --- ASSERT ----------------------------------------------------
|
||||
assert [p.text for p in out.payloads] == expected_texts
|
||||
@@ -1,927 +0,0 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": 72,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": " This chart shows how long it takes for Onyx to crawl each source connector and collect the current list of documents. The Y axis represents duration in seconds (bucketed), and each band shows how many enumerations completed within that time range.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
}
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"calculate": false,
|
||||
"cellGap": 1,
|
||||
"color": {
|
||||
"exponent": 0.5,
|
||||
"fill": "dark-orange",
|
||||
"mode": "scheme",
|
||||
"reverse": false,
|
||||
"scale": "exponential",
|
||||
"scheme": "Oranges",
|
||||
"steps": 64
|
||||
},
|
||||
"exemplars": {
|
||||
"color": "rgba(255,0,255,0.7)"
|
||||
},
|
||||
"filterValues": {
|
||||
"le": 1e-09
|
||||
},
|
||||
"legend": {
|
||||
"show": true
|
||||
},
|
||||
"rowsFrame": {
|
||||
"layout": "auto"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"showColorScale": false,
|
||||
"yHistogram": false
|
||||
},
|
||||
"yAxis": {
|
||||
"axisPlacement": "left",
|
||||
"reverse": false,
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.4.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(increase(onyx_pruning_enumeration_duration_seconds_bucket[30m])) by (le)",
|
||||
"format": "heatmap",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Pruning Enumeration Duration",
|
||||
"type": "heatmap"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.95, sum(rate(onyx_pruning_enumeration_duration_seconds_bucket[1h])) by (le, connector_type))",
|
||||
"instant": false,
|
||||
"legendFormat": "{{connector_type}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Pruning Enumeration Duration p95 by Connector",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Shows how many pruning enumerations completed per hour, broken down by connector type. A low count means few connectors are successfully completing the enumeration phase. A count of 0 for a connector type that should be pruning indicates enumerations are timing out before completion.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(increase(onyx_pruning_enumeration_duration_seconds_count[1h])) by (connector_type)",
|
||||
"instant": false,
|
||||
"legendFormat": "{{connector_type}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Pruning Enumeration Count",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Shows the 95th percentile execution duration of pruning tasks. A rising p95 indicates pruning jobs are taking longer over time, potentially approaching the 6-hour timeout limit. Sustained values near 21600s (6 hours) indicate connectors with too many documents to prune within the allowed window.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.95, sum(rate(onyx_celery_task_duration_seconds_bucket{task_name=~\"connector_pruning.*\"}[1h])) by (le, task_name))",
|
||||
"instant": false,
|
||||
"legendFormat": "{{task_name}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Pruning Task Duration p95",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "hows the number of currently executing pruning tasks on the heavy worker, broken down by task type. A value of 0 means no pruning is actively running. A sustained high count may indicate workers are saturated and new pruning jobs are queuing up.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"__systemRef": "hideSeriesFrom",
|
||||
"matcher": {
|
||||
"id": "byNames",
|
||||
"options": {
|
||||
"mode": "exclude",
|
||||
"names": [
|
||||
"connector_pruning_generator_task"
|
||||
],
|
||||
"prefix": "All except:",
|
||||
"readOnly": true
|
||||
}
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.hideFrom",
|
||||
"value": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(onyx_celery_tasks_active{queue=~\"connector_pruning.*|connector_doc_permissions.*|connector_external_group.*|csv_generation|sandbox\"}) by (task_name)",
|
||||
"instant": false,
|
||||
"legendFormat": "{{task_name}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Heavy Worker - Active Tasks",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "This chart shows how frequently Onyx hits rate limits from source connectors during the enumeration phase. Rate limit errors slow down or stall the document crawl, directly increasing enumeration duration. A spike here for a specific connector type indicates the source API is throttling Onyx's requests, which may explain long enumeration times for that connector.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(onyx_pruning_rate_limit_errors_total[5m])) by (connector_type)",
|
||||
"instant": false,
|
||||
"legendFormat": "{{connector_type}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Pruning Rate Limit Errors",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Shows the rate of pruning task failures and revocations per hour. Failures indicate crashed tasks (DB errors, timeouts). Revocations indicate cancelled tasks, typically from worker restarts or deployments. Both result in orphaned fences that block future pruning attempts for affected connectors.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(increase(onyx_celery_task_revoked_total{task_name=~\"connector_pruning.*\"}[1h])) by (task_name)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "revoked",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(increase(onyx_celery_task_completed_total{task_name=~\"connector_pruning.*\", outcome=\"failure\"}[1h])) by (task_name)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "failure",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(increase(onyx_celery_task_completed_total{task_name=~\"connector_pruning.*\", outcome=\"success\"}[1h])) by (task_name)",
|
||||
"hide": false,
|
||||
"instant": false,
|
||||
"legendFormat": "success",
|
||||
"range": true,
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "Heavy Worker - Pruning Task Success & Failures & Revocations",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "Shows the ratio of successfully completed pruning tasks to total completed tasks. A value of 1.0 (100%) means all pruning jobs are completing cleanly. A drop indicates tasks are crashing or timing out, which leads to orphaned fences and connectors being blocked from future pruning attempts.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 24
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": " sum(rate(onyx_celery_task_completed_total{task_name=~\"connector_pruning.*\", outcome=\"success\"}[1h]))\n /\n sum(rate(onyx_celery_task_completed_total{task_name=~\"connector_pruning.*\"}[1h]))",
|
||||
"instant": false,
|
||||
"legendFormat": "Success Rate",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Heavy Worker - Pruning Task Success Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"description": "This chart shows how long it takes Onyx to compare the list of documents fetched from the source connector against what is currently indexed. The diff computes the set difference \u2014 documents that exist in the index but no longer exist in the source are flagged for removal.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
}
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 32
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"calculate": false,
|
||||
"cellGap": 1,
|
||||
"color": {
|
||||
"exponent": 0.5,
|
||||
"fill": "dark-orange",
|
||||
"mode": "scheme",
|
||||
"reverse": false,
|
||||
"scale": "exponential",
|
||||
"scheme": "Oranges",
|
||||
"steps": 64
|
||||
},
|
||||
"exemplars": {
|
||||
"color": "rgba(255,0,255,0.7)"
|
||||
},
|
||||
"filterValues": {
|
||||
"le": 1e-09
|
||||
},
|
||||
"legend": {
|
||||
"show": true
|
||||
},
|
||||
"rowsFrame": {
|
||||
"layout": "auto"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"showColorScale": false,
|
||||
"yHistogram": false
|
||||
},
|
||||
"yAxis": {
|
||||
"axisPlacement": "left",
|
||||
"reverse": false,
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.4.1",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(increase(onyx_pruning_diff_duration_seconds_bucket[30m])) by (le)",
|
||||
"format": "heatmap",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Pruning Diff Duration",
|
||||
"type": "heatmap"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "Indexing - Pruning",
|
||||
"uid": "onyx-indexing-pruning",
|
||||
"version": 10,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -38,17 +38,4 @@ metadata:
|
||||
data:
|
||||
onyx-redis-queues.json: |
|
||||
{{- .Files.Get "dashboards/redis-queues.json" | nindent 4 }}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "onyx.fullname" . }}-indexing-pruning-dashboard
|
||||
labels:
|
||||
{{- include "onyx.labels" . | nindent 4 }}
|
||||
grafana_dashboard: "1"
|
||||
annotations:
|
||||
grafana_folder: "Onyx"
|
||||
data:
|
||||
onyx-indexing-pruning.json: |
|
||||
{{- .Files.Get "dashboards/indexing-pruning.json" | nindent 4 }}
|
||||
{{- end }}
|
||||
|
||||
388
web/package-lock.json
generated
388
web/package-lock.json
generated
@@ -62,7 +62,7 @@
|
||||
"mdast-util-find-and-replace": "^3.0.1",
|
||||
"mime": "^4.1.0",
|
||||
"motion": "^12.29.0",
|
||||
"next": "16.2.3",
|
||||
"next": "16.1.7",
|
||||
"next-themes": "^0.4.4",
|
||||
"postcss": "^8.5.6",
|
||||
"posthog-js": "^1.176.0",
|
||||
@@ -1676,9 +1676,7 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@img/colour": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz",
|
||||
"integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==",
|
||||
"version": "1.0.0",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
@@ -1790,9 +1788,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linux-ppc64": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.4.tgz",
|
||||
"integrity": "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==",
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.3.tgz",
|
||||
"integrity": "sha512-Y2T7IsQvJLMCBM+pmPbM3bKT/yYJvVtLJGfCs4Sp95SjvnFIjynbjzsa7dY1fRJX45FTSfDksbTp6AGWudiyCg==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
@@ -1805,22 +1803,6 @@
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linux-riscv64": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-riscv64/-/sharp-libvips-linux-riscv64-1.2.4.tgz",
|
||||
"integrity": "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"license": "LGPL-3.0-or-later",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linux-s390x": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.0.4.tgz",
|
||||
@@ -1930,9 +1912,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linux-ppc64": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.5.tgz",
|
||||
"integrity": "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==",
|
||||
"version": "0.34.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.4.tgz",
|
||||
"integrity": "sha512-F4PDtF4Cy8L8hXA2p3TO6s4aDt93v+LKmpcYFLAVdkkD3hSxZzee0rh6/+94FpAynsuMpLX5h+LRsSG3rIciUQ==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
@@ -1948,29 +1930,7 @@
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-ppc64": "1.2.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linux-riscv64": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-riscv64/-/sharp-linux-riscv64-0.34.5.tgz",
|
||||
"integrity": "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-riscv64": "1.2.4"
|
||||
"@img/sharp-libvips-linux-ppc64": "1.2.3"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linux-s390x": {
|
||||
@@ -2081,9 +2041,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-win32-arm64": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.5.tgz",
|
||||
"integrity": "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==",
|
||||
"version": "0.34.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.4.tgz",
|
||||
"integrity": "sha512-2Q250do/5WXTwxW3zjsEuMSv5sUU4Tq9VThWKlU2EYLm4MB7ZeMwF+SFJutldYODXF6jzc6YEOC+VfX0SZQPqA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -2937,9 +2897,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.2.3.tgz",
|
||||
"integrity": "sha512-ZWXyj4uNu4GCWQw9cjRxWlbD+33mcDszIo9iQxFnBX3Wmgq9ulaSJcl6VhuWx5pCWqqD+9W6Wfz7N0lM5lYPMA==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
|
||||
"integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
@@ -2983,9 +2943,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.2.3.tgz",
|
||||
"integrity": "sha512-u37KDKTKQ+OQLvY+z7SNXixwo4Q2/IAJFDzU1fYe66IbCE51aDSAzkNDkWmLN0yjTUh4BKBd+hb69jYn6qqqSg==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
|
||||
"integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -2999,9 +2959,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.2.3.tgz",
|
||||
"integrity": "sha512-gHjL/qy6Q6CG3176FWbAKyKh9IfntKZTB3RY/YOJdDFpHGsUDXVH38U4mMNpHVGXmeYW4wj22dMp1lTfmu/bTQ==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
|
||||
"integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -3015,9 +2975,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.2.3.tgz",
|
||||
"integrity": "sha512-U6vtblPtU/P14Y/b/n9ZY0GOxbbIhTFuaFR7F4/uMBidCi2nSdaOFhA0Go81L61Zd6527+yvuX44T4ksnf8T+Q==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
|
||||
"integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -3031,9 +2991,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.2.3.tgz",
|
||||
"integrity": "sha512-/YV0LgjHUmfhQpn9bVoGc4x4nan64pkhWR5wyEV8yCOfwwrH630KpvRg86olQHTwHIn1z59uh6JwKvHq1h4QEw==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
|
||||
"integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -3047,9 +3007,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.2.3.tgz",
|
||||
"integrity": "sha512-/HiWEcp+WMZ7VajuiMEFGZ6cg0+aYZPqCJD3YJEfpVWQsKYSjXQG06vJP6F1rdA03COD9Fef4aODs3YxKx+RDQ==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
|
||||
"integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -3063,9 +3023,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.2.3.tgz",
|
||||
"integrity": "sha512-Kt44hGJfZSefebhk/7nIdivoDr3Ugp5+oNz9VvF3GUtfxutucUIHfIO0ZYO8QlOPDQloUVQn4NVC/9JvHRk9hw==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
|
||||
"integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -3079,9 +3039,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.2.3.tgz",
|
||||
"integrity": "sha512-O2NZ9ie3Tq6xj5Z5CSwBT3+aWAMW2PIZ4egUi9MaWLkwaehgtB7YZjPm+UpcNpKOme0IQuqDcor7BsW6QBiQBw==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
|
||||
"integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -3095,9 +3055,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.2.3.tgz",
|
||||
"integrity": "sha512-Ibm29/GgB/ab5n7XKqlStkm54qqZE8v2FnijUPBgrd67FWrac45o/RsNlaOWjme/B5UqeWt/8KM4aWBwA1D2Kw==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
|
||||
"integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -14128,12 +14088,12 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "16.2.3",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-16.2.3.tgz",
|
||||
"integrity": "sha512-9V3zV4oZFza3PVev5/poB9g0dEafVcgNyQ8eTRop8GvxZjV2G15FC5ARuG1eFD42QgeYkzJBJzHghNP8Ad9xtA==",
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
|
||||
"integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@next/env": "16.2.3",
|
||||
"@next/env": "16.1.7",
|
||||
"@swc/helpers": "0.5.15",
|
||||
"baseline-browser-mapping": "^2.9.19",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
@@ -14147,15 +14107,15 @@
|
||||
"node": ">=20.9.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "16.2.3",
|
||||
"@next/swc-darwin-x64": "16.2.3",
|
||||
"@next/swc-linux-arm64-gnu": "16.2.3",
|
||||
"@next/swc-linux-arm64-musl": "16.2.3",
|
||||
"@next/swc-linux-x64-gnu": "16.2.3",
|
||||
"@next/swc-linux-x64-musl": "16.2.3",
|
||||
"@next/swc-win32-arm64-msvc": "16.2.3",
|
||||
"@next/swc-win32-x64-msvc": "16.2.3",
|
||||
"sharp": "^0.34.5"
|
||||
"@next/swc-darwin-arm64": "16.1.7",
|
||||
"@next/swc-darwin-x64": "16.1.7",
|
||||
"@next/swc-linux-arm64-gnu": "16.1.7",
|
||||
"@next/swc-linux-arm64-musl": "16.1.7",
|
||||
"@next/swc-linux-x64-gnu": "16.1.7",
|
||||
"@next/swc-linux-x64-musl": "16.1.7",
|
||||
"@next/swc-win32-arm64-msvc": "16.1.7",
|
||||
"@next/swc-win32-x64-msvc": "16.1.7",
|
||||
"sharp": "^0.34.4"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.1.0",
|
||||
@@ -14188,32 +14148,10 @@
|
||||
"react-dom": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-darwin-arm64": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.5.tgz",
|
||||
"integrity": "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-darwin-arm64": "1.2.4"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-darwin-x64": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.5.tgz",
|
||||
"integrity": "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==",
|
||||
"version": "0.34.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.4.tgz",
|
||||
"integrity": "sha512-rZheupWIoa3+SOdF/IcUe1ah4ZDpKBGWcsPX6MT0lYniH9micvIU7HQkYTfrx5Xi8u+YqwLtxC/3vl8TQN6rMg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -14229,29 +14167,13 @@
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-darwin-x64": "1.2.4"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-libvips-darwin-arm64": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.4.tgz",
|
||||
"integrity": "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "LGPL-3.0-or-later",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
"@img/sharp-libvips-darwin-x64": "1.2.3"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-libvips-darwin-x64": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.4.tgz",
|
||||
"integrity": "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==",
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.3.tgz",
|
||||
"integrity": "sha512-Ju+g2xn1E2AKO6YBhxjj+ACcsPQRHT0bhpglxcEf+3uyPY+/gL8veniKoo96335ZaPo03bdDXMv0t+BBFAbmRA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -14265,9 +14187,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-libvips-linux-arm": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.4.tgz",
|
||||
"integrity": "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==",
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.3.tgz",
|
||||
"integrity": "sha512-x1uE93lyP6wEwGvgAIV0gP6zmaL/a0tGzJs/BIDDG0zeBhMnuUPm7ptxGhUbcGs4okDJrk4nxgrmxpib9g6HpA==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
@@ -14281,9 +14203,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-libvips-linux-arm64": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.4.tgz",
|
||||
"integrity": "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==",
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.3.tgz",
|
||||
"integrity": "sha512-I4RxkXU90cpufazhGPyVujYwfIm9Nk1QDEmiIsaPwdnm013F7RIceaCc87kAH+oUB1ezqEvC6ga4m7MSlqsJvQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -14297,9 +14219,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-libvips-linux-s390x": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.4.tgz",
|
||||
"integrity": "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==",
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.3.tgz",
|
||||
"integrity": "sha512-RgWrs/gVU7f+K7P+KeHFaBAJlNkD1nIZuVXdQv6S+fNA6syCcoboNjsV2Pou7zNlVdNQoQUpQTk8SWDHUA3y/w==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
@@ -14313,9 +14235,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-libvips-linux-x64": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.4.tgz",
|
||||
"integrity": "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==",
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.3.tgz",
|
||||
"integrity": "sha512-3JU7LmR85K6bBiRzSUc/Ff9JBVIFVvq6bomKE0e63UXGeRw2HPVEjoJke1Yx+iU4rL7/7kUjES4dZ/81Qjhyxg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -14329,9 +14251,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-libvips-linuxmusl-arm64": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.4.tgz",
|
||||
"integrity": "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==",
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.3.tgz",
|
||||
"integrity": "sha512-F9q83RZ8yaCwENw1GieztSfj5msz7GGykG/BA+MOUefvER69K/ubgFHNeSyUu64amHIYKGDs4sRCMzXVj8sEyw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -14345,9 +14267,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-libvips-linuxmusl-x64": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.4.tgz",
|
||||
"integrity": "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==",
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.3.tgz",
|
||||
"integrity": "sha512-U5PUY5jbc45ANM6tSJpsgqmBF/VsL6LnxJmIf11kB7J5DctHgqm0SkuXzVWtIY90GnJxKnC/JT251TDnk1fu/g==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -14361,9 +14283,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-linux-arm": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.5.tgz",
|
||||
"integrity": "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==",
|
||||
"version": "0.34.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.4.tgz",
|
||||
"integrity": "sha512-Xyam4mlqM0KkTHYVSuc6wXRmM7LGN0P12li03jAnZ3EJWZqj83+hi8Y9UxZUbxsgsK1qOEwg7O0Bc0LjqQVtxA==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
@@ -14379,13 +14301,13 @@
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-arm": "1.2.4"
|
||||
"@img/sharp-libvips-linux-arm": "1.2.3"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-linux-arm64": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.5.tgz",
|
||||
"integrity": "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==",
|
||||
"version": "0.34.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.4.tgz",
|
||||
"integrity": "sha512-YXU1F/mN/Wu786tl72CyJjP/Ngl8mGHN1hST4BGl+hiW5jhCnV2uRVTNOcaYPs73NeT/H8Upm3y9582JVuZHrQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -14401,13 +14323,13 @@
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-arm64": "1.2.4"
|
||||
"@img/sharp-libvips-linux-arm64": "1.2.3"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-linux-s390x": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.5.tgz",
|
||||
"integrity": "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==",
|
||||
"version": "0.34.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.4.tgz",
|
||||
"integrity": "sha512-qVrZKE9Bsnzy+myf7lFKvng6bQzhNUAYcVORq2P7bDlvmF6u2sCmK2KyEQEBdYk+u3T01pVsPrkj943T1aJAsw==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
@@ -14423,13 +14345,13 @@
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-s390x": "1.2.4"
|
||||
"@img/sharp-libvips-linux-s390x": "1.2.3"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-linux-x64": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.5.tgz",
|
||||
"integrity": "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==",
|
||||
"version": "0.34.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.4.tgz",
|
||||
"integrity": "sha512-ZfGtcp2xS51iG79c6Vhw9CWqQC8l2Ot8dygxoDoIQPTat/Ov3qAa8qpxSrtAEAJW+UjTXc4yxCjNfxm4h6Xm2A==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -14445,13 +14367,13 @@
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-x64": "1.2.4"
|
||||
"@img/sharp-libvips-linux-x64": "1.2.3"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-linuxmusl-arm64": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.5.tgz",
|
||||
"integrity": "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==",
|
||||
"version": "0.34.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.4.tgz",
|
||||
"integrity": "sha512-8hDVvW9eu4yHWnjaOOR8kHVrew1iIX+MUgwxSuH2XyYeNRtLUe4VNioSqbNkB7ZYQJj9rUTT4PyRscyk2PXFKA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -14467,13 +14389,13 @@
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linuxmusl-arm64": "1.2.4"
|
||||
"@img/sharp-libvips-linuxmusl-arm64": "1.2.3"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-linuxmusl-x64": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.5.tgz",
|
||||
"integrity": "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==",
|
||||
"version": "0.34.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.4.tgz",
|
||||
"integrity": "sha512-lU0aA5L8QTlfKjpDCEFOZsTYGn3AEiO6db8W5aQDxj0nQkVrZWmN3ZP9sYKWJdtq3PWPhUNlqehWyXpYDcI9Sg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -14489,20 +14411,20 @@
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linuxmusl-x64": "1.2.4"
|
||||
"@img/sharp-libvips-linuxmusl-x64": "1.2.3"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-wasm32": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.5.tgz",
|
||||
"integrity": "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==",
|
||||
"version": "0.34.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.4.tgz",
|
||||
"integrity": "sha512-33QL6ZO/qpRyG7woB/HUALz28WnTMI2W1jgX3Nu2bypqLIKx/QKMILLJzJjI+SIbvXdG9fUnmrxR7vbi1sTBeA==",
|
||||
"cpu": [
|
||||
"wasm32"
|
||||
],
|
||||
"license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"@emnapi/runtime": "^1.7.0"
|
||||
"@emnapi/runtime": "^1.5.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
@@ -14512,9 +14434,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-win32-ia32": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.5.tgz",
|
||||
"integrity": "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==",
|
||||
"version": "0.34.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.4.tgz",
|
||||
"integrity": "sha512-3ZeLue5V82dT92CNL6rsal6I2weKw1cYu+rGKm8fOCCtJTR2gYeUfY3FqUnIJsMUPIH68oS5jmZ0NiJ508YpEw==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
@@ -14531,9 +14453,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/@img/sharp-win32-x64": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.5.tgz",
|
||||
"integrity": "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==",
|
||||
"version": "0.34.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.4.tgz",
|
||||
"integrity": "sha512-xIyj4wpYs8J18sVN3mSQjwrw7fKUqRw+Z5rnHNCy5fYTxigBz81u5mOMPmFumwjcn8+ld1ppptMBCLic1nz6ig==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -14576,16 +14498,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/sharp": {
|
||||
"version": "0.34.5",
|
||||
"resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz",
|
||||
"integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==",
|
||||
"version": "0.34.4",
|
||||
"hasInstallScript": true,
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"@img/colour": "^1.0.0",
|
||||
"detect-libc": "^2.1.2",
|
||||
"semver": "^7.7.3"
|
||||
"detect-libc": "^2.1.0",
|
||||
"semver": "^7.7.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
@@ -14594,30 +14514,62 @@
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-darwin-arm64": "0.34.5",
|
||||
"@img/sharp-darwin-x64": "0.34.5",
|
||||
"@img/sharp-libvips-darwin-arm64": "1.2.4",
|
||||
"@img/sharp-libvips-darwin-x64": "1.2.4",
|
||||
"@img/sharp-libvips-linux-arm": "1.2.4",
|
||||
"@img/sharp-libvips-linux-arm64": "1.2.4",
|
||||
"@img/sharp-libvips-linux-ppc64": "1.2.4",
|
||||
"@img/sharp-libvips-linux-riscv64": "1.2.4",
|
||||
"@img/sharp-libvips-linux-s390x": "1.2.4",
|
||||
"@img/sharp-libvips-linux-x64": "1.2.4",
|
||||
"@img/sharp-libvips-linuxmusl-arm64": "1.2.4",
|
||||
"@img/sharp-libvips-linuxmusl-x64": "1.2.4",
|
||||
"@img/sharp-linux-arm": "0.34.5",
|
||||
"@img/sharp-linux-arm64": "0.34.5",
|
||||
"@img/sharp-linux-ppc64": "0.34.5",
|
||||
"@img/sharp-linux-riscv64": "0.34.5",
|
||||
"@img/sharp-linux-s390x": "0.34.5",
|
||||
"@img/sharp-linux-x64": "0.34.5",
|
||||
"@img/sharp-linuxmusl-arm64": "0.34.5",
|
||||
"@img/sharp-linuxmusl-x64": "0.34.5",
|
||||
"@img/sharp-wasm32": "0.34.5",
|
||||
"@img/sharp-win32-arm64": "0.34.5",
|
||||
"@img/sharp-win32-ia32": "0.34.5",
|
||||
"@img/sharp-win32-x64": "0.34.5"
|
||||
"@img/sharp-darwin-arm64": "0.34.4",
|
||||
"@img/sharp-darwin-x64": "0.34.4",
|
||||
"@img/sharp-libvips-darwin-arm64": "1.2.3",
|
||||
"@img/sharp-libvips-darwin-x64": "1.2.3",
|
||||
"@img/sharp-libvips-linux-arm": "1.2.3",
|
||||
"@img/sharp-libvips-linux-arm64": "1.2.3",
|
||||
"@img/sharp-libvips-linux-ppc64": "1.2.3",
|
||||
"@img/sharp-libvips-linux-s390x": "1.2.3",
|
||||
"@img/sharp-libvips-linux-x64": "1.2.3",
|
||||
"@img/sharp-libvips-linuxmusl-arm64": "1.2.3",
|
||||
"@img/sharp-libvips-linuxmusl-x64": "1.2.3",
|
||||
"@img/sharp-linux-arm": "0.34.4",
|
||||
"@img/sharp-linux-arm64": "0.34.4",
|
||||
"@img/sharp-linux-ppc64": "0.34.4",
|
||||
"@img/sharp-linux-s390x": "0.34.4",
|
||||
"@img/sharp-linux-x64": "0.34.4",
|
||||
"@img/sharp-linuxmusl-arm64": "0.34.4",
|
||||
"@img/sharp-linuxmusl-x64": "0.34.4",
|
||||
"@img/sharp-wasm32": "0.34.4",
|
||||
"@img/sharp-win32-arm64": "0.34.4",
|
||||
"@img/sharp-win32-ia32": "0.34.4",
|
||||
"@img/sharp-win32-x64": "0.34.4"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/sharp/node_modules/@img/sharp-darwin-arm64": {
|
||||
"version": "0.34.4",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-darwin-arm64": "1.2.3"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/sharp/node_modules/@img/sharp-libvips-darwin-arm64": {
|
||||
"version": "1.2.3",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "LGPL-3.0-or-later",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/node-fetch": {
|
||||
|
||||
@@ -80,7 +80,7 @@
|
||||
"mdast-util-find-and-replace": "^3.0.1",
|
||||
"mime": "^4.1.0",
|
||||
"motion": "^12.29.0",
|
||||
"next": "16.2.3",
|
||||
"next": "16.1.7",
|
||||
"next-themes": "^0.4.4",
|
||||
"postcss": "^8.5.6",
|
||||
"posthog-js": "^1.176.0",
|
||||
|
||||
@@ -136,49 +136,32 @@ const AgentMessage = React.memo(function AgentMessage({
|
||||
finalAnswerComing
|
||||
);
|
||||
|
||||
// Merge streaming citation/document data with chatState props.
|
||||
// NOTE: citationMap and documentMap from usePacketProcessor are mutated in
|
||||
// place (same object reference), so we use citations.length / documentMap.size
|
||||
// as change-detection proxies to bust the memo cache when new data arrives.
|
||||
// Memoize merged citations separately to avoid creating new object when neither source changed
|
||||
const mergedCitations = useMemo(
|
||||
() => ({
|
||||
...chatState.citations,
|
||||
...citationMap,
|
||||
}),
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
[chatState.citations, citationMap, citations.length]
|
||||
[chatState.citations, citationMap]
|
||||
);
|
||||
|
||||
// Merge streaming documentMap into chatState.docs so inline citation chips
|
||||
// can resolve [1] → document even when chatState.docs is empty (multi-model).
|
||||
const mergedDocs = useMemo(() => {
|
||||
const propDocs = chatState.docs ?? [];
|
||||
if (documentMap.size === 0) return propDocs;
|
||||
const seen = new Set(propDocs.map((d) => d.document_id));
|
||||
const extras = Array.from(documentMap.values()).filter(
|
||||
(d) => !seen.has(d.document_id)
|
||||
);
|
||||
return extras.length > 0 ? [...propDocs, ...extras] : propDocs;
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [chatState.docs, documentMap, documentMap.size]);
|
||||
|
||||
// Create a chatState that uses streaming citations and documents for immediate rendering.
|
||||
// Memoized with granular dependencies to prevent cascading re-renders.
|
||||
// Create a chatState that uses streaming citations for immediate rendering
|
||||
// This merges the prop citations with streaming citations, preferring streaming ones
|
||||
// Memoized with granular dependencies to prevent cascading re-renders
|
||||
// Note: chatState object is recreated upstream on every render, so we depend on
|
||||
// individual fields instead of the whole object for proper memoization.
|
||||
// individual fields instead of the whole object for proper memoization
|
||||
const effectiveChatState = useMemo<FullChatState>(
|
||||
() => ({
|
||||
...chatState,
|
||||
citations: mergedCitations,
|
||||
docs: mergedDocs,
|
||||
}),
|
||||
[
|
||||
chatState.agent,
|
||||
chatState.docs,
|
||||
chatState.setPresentingDocument,
|
||||
chatState.overriddenModel,
|
||||
chatState.researchType,
|
||||
mergedCitations,
|
||||
mergedDocs,
|
||||
]
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user