Compare commits

...

33 Commits

Author SHA1 Message Date
Justin Tahara
73f9a47364 fix(xlsx): Openpyxl Formatting Issues (#10230) 2026-04-15 21:22:58 +00:00
Raunak Bhagat
a808445d96 feat: opalify MessageCard (#10223) 2026-04-15 21:11:18 +00:00
Nikolas Garza
c31215197a fix(chat): hide incomplete citation links during streaming (#10224) 2026-04-15 21:10:06 +00:00
Nikolas Garza
9ebd9ebd73 fix(chat): snap typewriter to full content on tab re-focus (#10226) 2026-04-15 21:07:00 +00:00
Nikolas Garza
f0bb0a6bb0 fix(chat): only header click selects preferred in multi-model panels (#10198) 2026-04-15 21:06:19 +00:00
Ben Wu
01bec19d19 feat(canvas): checkpoint logic (3/4) (#9807) 2026-04-15 20:48:16 +00:00
Danelegend
7b40c2cde7 feat(indexing): CSV Chunker - Field-Value Implementation (#10099) 2026-04-15 19:57:50 +00:00
Jamison Lahman
e2c38d2899 chore(devtools): connect databases and github remote to devcontainer (#10222) 2026-04-15 19:50:11 +00:00
Nikolas Garza
24768f9e4f feat(metrics): replace pull-based connector metrics with push-based for multi-tenant (#10189) 2026-04-15 18:15:34 +00:00
Bo-Onyx
aec1c169b6 feat(pruning): pruning grafana dashboard for single tenant (#10208) 2026-04-15 17:50:28 +00:00
Jamison Lahman
5a16ad3473 chore(tests): avoid openapi client import in tests (#10220) 2026-04-15 17:38:24 +00:00
dependabot[bot]
7e28e59f23 chore(deps): bump transformers from 4.53.0 to 5.5.4 (#9987)
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-04-15 10:39:50 -07:00
Nikolas Garza
879ae6c02d feat(monitoring): add local Prometheus + Grafana docker-compose stack (#9627) 2026-04-15 17:25:28 +00:00
Nikolas Garza
f84f367eb4 fix(voice): send TTS text in POST body instead of query params (#10213) 2026-04-15 17:20:29 +00:00
Jamison Lahman
d81efe3877 fix(ollama): always include model tag in display name (#10218)
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-04-15 09:17:37 -07:00
Nikolas Garza
d4619f93c4 feat(indexing): notify admins when connector enters repeated error state (#10207) 2026-04-15 06:10:25 +00:00
Nikolas Garza
70fcfb1d73 feat(indexing): add admin API for failed documents (#10204) 2026-04-15 06:10:06 +00:00
Nikolas Garza
32ba393b32 fix(chat): keep model selector popover open until max models reached (#10203) 2026-04-15 06:09:24 +00:00
Nikolas Garza
f9d2bf78ed fix(chat): disable hover/pointer states on multi-model panels during streaming (#10202) 2026-04-15 06:09:11 +00:00
Nikolas Garza
5567a078fe fix(chat): fix fade gradient missing on last multi-model panel (#10199) 2026-04-15 06:08:48 +00:00
Raunak Bhagat
fc0e8560bc feat: opalify Tooltip component, migrate all consumers (#10210) 2026-04-15 03:42:15 +00:00
Nikolas Garza
60b2701eed feat(indexing): add diagnostic logging to check_for_indexing beat task (#10200) 2026-04-14 20:29:47 -07:00
Jamison Lahman
3682d9844b fix(fe): handle file attachment overflow (#10211) 2026-04-15 02:00:58 +00:00
Raunak Bhagat
a420f9a37c feat: add ref forwarding to input layout components (#10206) 2026-04-15 00:20:50 +00:00
Jamison Lahman
20c5107ba6 chore(devtools): install java runtime into devcontainer (#10197) 2026-04-14 23:10:12 +00:00
Nikolas Garza
357bc91aee feat(indexing): capture swallowed per-doc exceptions in Sentry (#10149) 2026-04-14 23:01:42 +00:00
Nikolas Garza
09653872a2 fix(chat): render inline citation chips in multi-model panels (#10196) 2026-04-14 22:59:10 +00:00
dependabot[bot]
ff01a53f83 chore(deps): bump next from 16.1.7 to 16.2.3 in /web (#10195)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Jamison Lahman <jamison@lahman.dev>
2026-04-14 22:49:31 +00:00
Danelegend
03ddd5ca9b feat(indexing): Add TabularSection (#10095) 2026-04-14 22:16:35 +00:00
Bo-Onyx
8c49e4573c fix(pruning): Skip Permission Sync During Google Drive Pruning (#10185) 2026-04-14 22:14:09 +00:00
Jamison Lahman
f1696ffa16 chore(deps): upgrade playwright: 1.55.0->1.58.0 (#10194) 2026-04-14 15:12:14 -07:00
Jamison Lahman
a427cb5b0c chore(deps): upgrade python patch version in docker (#10192) 2026-04-14 15:10:00 -07:00
Evan Lohn
f7e4be18dd fix: uploaded files as knowledge source (#10167) 2026-04-14 21:51:00 +00:00
175 changed files with 8691 additions and 2672 deletions

View File

@@ -2,6 +2,7 @@ FROM ubuntu:26.04@sha256:cc925e589b7543b910fea57a240468940003fbfc0515245a495dd0a
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
default-jre \
fd-find \
fzf \
git \

View File

@@ -1,7 +1,7 @@
{
"name": "Onyx Dev Sandbox",
"image": "onyxdotapp/onyx-devcontainer@sha256:12184169c5bcc9cca0388286d5ffe504b569bc9c37bfa631b76ee8eee2064055",
"runArgs": ["--cap-add=NET_ADMIN", "--cap-add=NET_RAW"],
"image": "onyxdotapp/onyx-devcontainer@sha256:0f02d9299928849c7b15f3b348dcfdcdcb64411ff7a4580cbc026a6ee7aa1554",
"runArgs": ["--cap-add=NET_ADMIN", "--cap-add=NET_RAW", "--network=onyx_default"],
"mounts": [
"source=${localEnv:HOME}/.claude,target=/home/dev/.claude,type=bind",
"source=${localEnv:HOME}/.claude.json,target=/home/dev/.claude.json,type=bind",
@@ -12,10 +12,13 @@
"source=onyx-devcontainer-local,target=/home/dev/.local,type=volume"
],
"containerEnv": {
"SSH_AUTH_SOCK": "/tmp/ssh-agent.sock"
"SSH_AUTH_SOCK": "/tmp/ssh-agent.sock",
"POSTGRES_HOST": "relational_db",
"REDIS_HOST": "cache"
},
"remoteUser": "${localEnv:DEVCONTAINER_REMOTE_USER:dev}",
"updateRemoteUserUID": false,
"initializeCommand": "docker network create onyx_default 2>/dev/null || true",
"workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=delegated",
"workspaceFolder": "/workspace",
"postStartCommand": "sudo bash /workspace/.devcontainer/init-dev-user.sh && sudo bash /workspace/.devcontainer/init-firewall.sh",

View File

@@ -4,22 +4,12 @@ set -euo pipefail
echo "Setting up firewall..."
# Preserve docker dns resolution
DOCKER_DNS_RULES=$(iptables-save | grep -E "^-A.*-d 127.0.0.11/32" || true)
# Flush all rules
iptables -t nat -F
iptables -t nat -X
iptables -t mangle -F
iptables -t mangle -X
# Only flush the filter table. The nat and mangle tables are managed by Docker
# (DNS DNAT to 127.0.0.11, container networking, etc.) and must not be touched —
# flushing them breaks Docker's embedded DNS resolver.
iptables -F
iptables -X
# Restore docker dns rules
if [ -n "$DOCKER_DNS_RULES" ]; then
echo "$DOCKER_DNS_RULES" | iptables-restore -n
fi
# Create ipset for allowed destinations
ipset create allowed-domains hash:net || true
ipset flush allowed-domains
@@ -34,6 +24,7 @@ done
# Resolve allowed domains
ALLOWED_DOMAINS=(
"github.com"
"registry.npmjs.org"
"api.anthropic.com"
"api-staging.anthropic.com"
@@ -65,6 +56,14 @@ if [ -n "$DOCKER_GATEWAY" ]; then
fi
fi
# Allow traffic to all attached Docker network subnets so the container can
# reach sibling services (e.g. relational_db, cache) on shared compose networks.
for subnet in $(ip -4 -o addr show scope global | awk '{print $4}'); do
if ! ipset add allowed-domains "$subnet" -exist 2>&1; then
echo "warning: failed to add Docker subnet $subnet to allowlist" >&2
fi
done
# Set default policies to DROP
iptables -P FORWARD DROP
iptables -P INPUT DROP

12
.vscode/launch.json vendored
View File

@@ -475,6 +475,18 @@
"order": 0
}
},
{
"name": "Start Monitoring Stack (Prometheus + Grafana)",
"type": "node",
"request": "launch",
"runtimeExecutable": "docker",
"runtimeArgs": ["compose", "up", "-d"],
"cwd": "${workspaceFolder}/profiling",
"console": "integratedTerminal",
"presentation": {
"group": "3"
}
},
{
"name": "Clear and Restart External Volumes and Containers",
"type": "node",

View File

@@ -1,4 +1,4 @@
FROM python:3.11.7-slim-bookworm
FROM python:3.11-slim-bookworm@sha256:9c6f90801e6b68e772b7c0ca74260cbf7af9f320acec894e26fccdaccfbe3b47
LABEL com.danswer.maintainer="founders@onyx.app"
LABEL com.danswer.description="This image is the web/frontend container of Onyx which \

View File

@@ -1,5 +1,5 @@
# Base stage with dependencies
FROM python:3.11.7-slim-bookworm AS base
FROM python:3.11-slim-bookworm@sha256:9c6f90801e6b68e772b7c0ca74260cbf7af9f320acec894e26fccdaccfbe3b47 AS base
ENV DANSWER_RUNNING_IN_DOCKER="true" \
HF_HOME=/app/.cache/huggingface

View File

@@ -96,11 +96,14 @@ def get_model_app() -> FastAPI:
title="Onyx Model Server", version=__version__, lifespan=lifespan
)
if SENTRY_DSN:
from onyx.configs.sentry import _add_instance_tags
sentry_sdk.init(
dsn=SENTRY_DSN,
integrations=[StarletteIntegration(), FastApiIntegration()],
traces_sample_rate=0.1,
release=__version__,
before_send=_add_instance_tags,
)
logger.info("Sentry initialized")
else:

View File

@@ -63,11 +63,14 @@ logger = setup_logger()
task_logger = get_task_logger(__name__)
if SENTRY_DSN:
from onyx.configs.sentry import _add_instance_tags
sentry_sdk.init(
dsn=SENTRY_DSN,
integrations=[CeleryIntegration()],
traces_sample_rate=0.1,
release=__version__,
before_send=_add_instance_tags,
)
logger.info("Sentry initialized")
else:

View File

@@ -135,10 +135,13 @@ def _docfetching_task(
# Since connector_indexing_proxy_task spawns a new process using this function as
# the entrypoint, we init Sentry here.
if SENTRY_DSN:
from onyx.configs.sentry import _add_instance_tags
sentry_sdk.init(
dsn=SENTRY_DSN,
traces_sample_rate=0.1,
release=__version__,
before_send=_add_instance_tags,
)
logger.info("Sentry initialized")
else:

View File

@@ -3,6 +3,7 @@ import os
import time
import traceback
from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime
from datetime import timedelta
from datetime import timezone
@@ -50,6 +51,7 @@ from onyx.configs.constants import AuthType
from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
from onyx.configs.constants import CELERY_INDEXING_LOCK_TIMEOUT
from onyx.configs.constants import MilestoneRecordType
from onyx.configs.constants import NotificationType
from onyx.configs.constants import OnyxCeleryPriority
from onyx.configs.constants import OnyxCeleryQueues
from onyx.configs.constants import OnyxCeleryTask
@@ -85,6 +87,8 @@ from onyx.db.indexing_coordination import INDEXING_PROGRESS_TIMEOUT_HOURS
from onyx.db.indexing_coordination import IndexingCoordination
from onyx.db.models import IndexAttempt
from onyx.db.models import SearchSettings
from onyx.db.notification import create_notification
from onyx.db.notification import get_notifications
from onyx.db.search_settings import get_current_search_settings
from onyx.db.search_settings import get_secondary_search_settings
from onyx.db.swap_index import check_and_perform_index_swap
@@ -105,6 +109,9 @@ from onyx.redis.redis_pool import get_redis_replica_client
from onyx.redis.redis_pool import redis_lock_dump
from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
from onyx.redis.redis_utils import is_fence
from onyx.server.metrics.connector_health_metrics import on_connector_error_state_change
from onyx.server.metrics.connector_health_metrics import on_connector_indexing_success
from onyx.server.metrics.connector_health_metrics import on_index_attempt_status_change
from onyx.server.runtime.onyx_runtime import OnyxRuntime
from onyx.utils.logger import setup_logger
from onyx.utils.middleware import make_randomized_onyx_request_id
@@ -400,7 +407,6 @@ def check_indexing_completion(
tenant_id: str,
task: Task,
) -> None:
logger.info(
f"Checking for indexing completion: attempt={index_attempt_id} tenant={tenant_id}"
)
@@ -521,13 +527,23 @@ def check_indexing_completion(
# Update CC pair status if successful
cc_pair = get_connector_credential_pair_from_id(
db_session, attempt.connector_credential_pair_id
db_session,
attempt.connector_credential_pair_id,
eager_load_connector=True,
)
if cc_pair is None:
raise RuntimeError(
f"CC pair {attempt.connector_credential_pair_id} not found in database"
)
source = cc_pair.connector.source.value
on_index_attempt_status_change(
tenant_id=tenant_id,
source=source,
cc_pair_id=cc_pair.id,
status=attempt.status.value,
)
if attempt.status.is_successful():
# NOTE: we define the last successful index time as the time the last successful
# attempt finished. This is distinct from the poll_range_end of the last successful
@@ -548,10 +564,39 @@ def check_indexing_completion(
event=MilestoneRecordType.CONNECTOR_SUCCEEDED,
)
on_connector_indexing_success(
tenant_id=tenant_id,
source=source,
cc_pair_id=cc_pair.id,
docs_indexed=attempt.new_docs_indexed or 0,
success_timestamp=attempt.time_updated.timestamp(),
)
# Clear repeated error state on success
if cc_pair.in_repeated_error_state:
cc_pair.in_repeated_error_state = False
# Delete any existing error notification for this CC pair so a
# fresh one is created if the connector fails again later.
for notif in get_notifications(
user=None,
db_session=db_session,
notif_type=NotificationType.CONNECTOR_REPEATED_ERRORS,
include_dismissed=True,
):
if (
notif.additional_data
and notif.additional_data.get("cc_pair_id") == cc_pair.id
):
db_session.delete(notif)
db_session.commit()
on_connector_error_state_change(
tenant_id=tenant_id,
source=source,
cc_pair_id=cc_pair.id,
in_error=False,
)
if attempt.status == IndexingStatus.SUCCESS:
logger.info(
@@ -608,6 +653,27 @@ def active_indexing_attempt(
return bool(active_indexing_attempt)
@dataclass
class _KickoffResult:
"""Tracks diagnostic counts from a _kickoff_indexing_tasks run."""
created: int = 0
skipped_active: int = 0
skipped_not_found: int = 0
skipped_not_indexable: int = 0
failed_to_create: int = 0
@property
def evaluated(self) -> int:
return (
self.created
+ self.skipped_active
+ self.skipped_not_found
+ self.skipped_not_indexable
+ self.failed_to_create
)
def _kickoff_indexing_tasks(
celery_app: Celery,
db_session: Session,
@@ -617,12 +683,12 @@ def _kickoff_indexing_tasks(
redis_client: Redis,
lock_beat: RedisLock,
tenant_id: str,
) -> int:
) -> _KickoffResult:
"""Kick off indexing tasks for the given cc_pair_ids and search_settings.
Returns the number of tasks successfully created.
Returns a _KickoffResult with diagnostic counts.
"""
tasks_created = 0
result = _KickoffResult()
for cc_pair_id in cc_pair_ids:
lock_beat.reacquire()
@@ -633,6 +699,7 @@ def _kickoff_indexing_tasks(
search_settings_id=search_settings.id,
db_session=db_session,
):
result.skipped_active += 1
continue
cc_pair = get_connector_credential_pair_from_id(
@@ -643,6 +710,7 @@ def _kickoff_indexing_tasks(
task_logger.warning(
f"_kickoff_indexing_tasks - CC pair not found: cc_pair={cc_pair_id}"
)
result.skipped_not_found += 1
continue
# Heavyweight check after fetching cc pair
@@ -657,6 +725,7 @@ def _kickoff_indexing_tasks(
f"search_settings={search_settings.id}, "
f"secondary_index_building={secondary_index_building}"
)
result.skipped_not_indexable += 1
continue
task_logger.debug(
@@ -696,13 +765,14 @@ def _kickoff_indexing_tasks(
task_logger.info(
f"Connector indexing queued: index_attempt={attempt_id} cc_pair={cc_pair.id} search_settings={search_settings.id}"
)
tasks_created += 1
result.created += 1
else:
task_logger.error(
f"Failed to create indexing task: cc_pair={cc_pair.id} search_settings={search_settings.id}"
)
result.failed_to_create += 1
return tasks_created
return result
@shared_task(
@@ -728,6 +798,8 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
task_logger.warning("check_for_indexing - Starting")
tasks_created = 0
primary_result = _KickoffResult()
secondary_result: _KickoffResult | None = None
locked = False
redis_client = get_redis_client()
redis_client_replica = get_redis_replica_client()
@@ -848,6 +920,39 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
cc_pair_id=cc_pair_id,
in_repeated_error_state=True,
)
on_connector_error_state_change(
tenant_id=tenant_id,
source=cc_pair.connector.source.value,
cc_pair_id=cc_pair_id,
in_error=True,
)
connector_name = (
cc_pair.name
or cc_pair.connector.name
or f"CC pair {cc_pair.id}"
)
source = cc_pair.connector.source.value
connector_url = f"/admin/connector/{cc_pair.id}"
create_notification(
user_id=None,
notif_type=NotificationType.CONNECTOR_REPEATED_ERRORS,
db_session=db_session,
title=f"Connector '{connector_name}' has entered repeated error state",
description=(
f"The {source} connector has failed repeatedly and "
f"has been flagged. View indexing history in the "
f"Advanced section: {connector_url}"
),
additional_data={"cc_pair_id": cc_pair.id},
)
task_logger.error(
f"Connector entered repeated error state: "
f"cc_pair={cc_pair.id} "
f"connector={cc_pair.connector.name} "
f"source={source}"
)
# When entering repeated error state, also pause the connector
# to prevent continued indexing retry attempts burning through embedding credits.
# NOTE: only for Cloud, since most self-hosted users use self-hosted embedding
@@ -863,7 +968,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
# Heavy check, should_index(), is called in _kickoff_indexing_tasks
with get_session_with_current_tenant() as db_session:
# Primary first
tasks_created += _kickoff_indexing_tasks(
primary_result = _kickoff_indexing_tasks(
celery_app=self.app,
db_session=db_session,
search_settings=current_search_settings,
@@ -873,6 +978,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
lock_beat=lock_beat,
tenant_id=tenant_id,
)
tasks_created += primary_result.created
# Secondary indexing (only if secondary search settings exist and switchover_type is not INSTANT)
if (
@@ -880,7 +986,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
and secondary_search_settings.switchover_type != SwitchoverType.INSTANT
and secondary_cc_pair_ids
):
tasks_created += _kickoff_indexing_tasks(
secondary_result = _kickoff_indexing_tasks(
celery_app=self.app,
db_session=db_session,
search_settings=secondary_search_settings,
@@ -890,6 +996,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
lock_beat=lock_beat,
tenant_id=tenant_id,
)
tasks_created += secondary_result.created
elif (
secondary_search_settings
and secondary_search_settings.switchover_type == SwitchoverType.INSTANT
@@ -1002,7 +1109,26 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
redis_lock_dump(lock_beat, redis_client)
time_elapsed = time.monotonic() - time_start
task_logger.info(f"check_for_indexing finished: elapsed={time_elapsed:.2f}")
task_logger.info(
f"check_for_indexing finished: "
f"elapsed={time_elapsed:.2f}s "
f"primary=[evaluated={primary_result.evaluated} "
f"created={primary_result.created} "
f"skipped_active={primary_result.skipped_active} "
f"skipped_not_found={primary_result.skipped_not_found} "
f"skipped_not_indexable={primary_result.skipped_not_indexable} "
f"failed={primary_result.failed_to_create}]"
+ (
f" secondary=[evaluated={secondary_result.evaluated} "
f"created={secondary_result.created} "
f"skipped_active={secondary_result.skipped_active} "
f"skipped_not_found={secondary_result.skipped_not_found} "
f"skipped_not_indexable={secondary_result.skipped_not_indexable} "
f"failed={secondary_result.failed_to_create}]"
if secondary_result
else ""
)
)
return tasks_created

View File

@@ -5,6 +5,7 @@ from datetime import datetime
from datetime import timedelta
from datetime import timezone
import sentry_sdk
from celery import Celery
from sqlalchemy.orm import Session
@@ -68,6 +69,7 @@ from onyx.redis.redis_pool import get_redis_client
from onyx.server.features.build.indexing.persistent_document_writer import (
get_persistent_document_writer,
)
from onyx.server.metrics.connector_health_metrics import on_index_attempt_status_change
from onyx.utils.logger import setup_logger
from onyx.utils.middleware import make_randomized_onyx_request_id
from onyx.utils.postgres_sanitization import sanitize_document_for_postgres
@@ -267,6 +269,13 @@ def run_docfetching_entrypoint(
)
credential_id = attempt.connector_credential_pair.credential_id
on_index_attempt_status_change(
tenant_id=tenant_id,
source=attempt.connector_credential_pair.connector.source.value,
cc_pair_id=connector_credential_pair_id,
status="in_progress",
)
logger.info(
f"Docfetching starting{tenant_str}: "
f"connector='{connector_name}' "
@@ -556,6 +565,27 @@ def connector_document_extraction(
# save record of any failures at the connector level
if failure is not None:
if failure.exception is not None:
with sentry_sdk.new_scope() as scope:
scope.set_tag("stage", "connector_fetch")
scope.set_tag("connector_source", db_connector.source.value)
scope.set_tag("cc_pair_id", str(cc_pair_id))
scope.set_tag("index_attempt_id", str(index_attempt_id))
scope.set_tag("tenant_id", tenant_id)
if failure.failed_document:
scope.set_tag(
"doc_id", failure.failed_document.document_id
)
if failure.failed_entity:
scope.set_tag(
"entity_id", failure.failed_entity.entity_id
)
scope.fingerprint = [
"connector-fetch-failure",
db_connector.source.value,
type(failure.exception).__name__,
]
sentry_sdk.capture_exception(failure.exception)
total_failures += 1
with get_session_with_current_tenant() as db_session:
create_index_attempt_error(

View File

@@ -283,6 +283,7 @@ class NotificationType(str, Enum):
RELEASE_NOTES = "release_notes"
ASSISTANT_FILES_READY = "assistant_files_ready"
FEATURE_ANNOUNCEMENT = "feature_announcement"
CONNECTOR_REPEATED_ERRORS = "connector_repeated_errors"
class BlobType(str, Enum):

View File

@@ -0,0 +1,48 @@
from typing import Any
from sentry_sdk.types import Event
from onyx.utils.logger import setup_logger
logger = setup_logger()
_instance_id_resolved = False
def _add_instance_tags(
event: Event,
hint: dict[str, Any], # noqa: ARG001
) -> Event | None:
"""Sentry before_send hook that lazily attaches instance identification tags.
On the first event, resolves the instance UUID from the KV store (requires DB)
and sets it as a global Sentry tag. Subsequent events pick it up automatically.
"""
global _instance_id_resolved
if _instance_id_resolved:
return event
try:
import sentry_sdk
from shared_configs.configs import MULTI_TENANT
if MULTI_TENANT:
instance_id = "multi-tenant-cloud"
else:
from onyx.utils.telemetry import get_or_generate_uuid
instance_id = get_or_generate_uuid()
sentry_sdk.set_tag("instance_id", instance_id)
# Also set on this event since set_tag won't retroactively apply
event.setdefault("tags", {})["instance_id"] = instance_id
# Only mark resolved after success — if DB wasn't ready, retry next event
_instance_id_resolved = True
except Exception:
logger.debug("Failed to resolve instance_id for Sentry tagging")
return event

View File

@@ -27,16 +27,19 @@ _STATUS_TO_ERROR_CODE: dict[int, OnyxErrorCode] = {
401: OnyxErrorCode.CREDENTIAL_EXPIRED,
403: OnyxErrorCode.INSUFFICIENT_PERMISSIONS,
404: OnyxErrorCode.BAD_GATEWAY,
429: OnyxErrorCode.RATE_LIMITED,
}
def _error_code_for_status(status_code: int) -> OnyxErrorCode:
"""Map an HTTP status code to the appropriate OnyxErrorCode.
Expects a >= 400 status code. Known codes (401, 403, 404, 429) are
Expects a >= 400 status code. Known codes (401, 403, 404) are
mapped to specific error codes; all other codes (unrecognised 4xx
and 5xx) map to BAD_GATEWAY as unexpected upstream errors.
Note: 429 is intentionally omitted — the rl_requests wrapper
handles rate limits transparently at the HTTP layer, so 429
responses never reach this function.
"""
if status_code in _STATUS_TO_ERROR_CODE:
return _STATUS_TO_ERROR_CODE[status_code]

View File

@@ -1,10 +1,9 @@
from datetime import datetime
from datetime import timezone
from enum import StrEnum
from typing import Any
from typing import cast
from typing import Literal
from typing import NoReturn
from typing import TypeAlias
from pydantic import BaseModel
from retry import retry
@@ -25,8 +24,11 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import ConnectorCheckpoint
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentFailure
from onyx.connectors.models import EntityFailure
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TextSection
from onyx.error_handling.exceptions import OnyxError
@@ -47,10 +49,6 @@ def _handle_canvas_api_error(e: OnyxError) -> NoReturn:
raise InsufficientPermissionsError(
"Canvas API token does not have sufficient permissions (HTTP 403)."
)
elif e.status_code == 429:
raise ConnectorValidationError(
"Canvas rate-limit exceeded (HTTP 429). Please try again later."
)
elif e.status_code >= 500:
raise UnexpectedValidationError(
f"Unexpected Canvas HTTP error (status={e.status_code}): {e}"
@@ -61,6 +59,60 @@ def _handle_canvas_api_error(e: OnyxError) -> NoReturn:
)
class CanvasStage(StrEnum):
PAGES = "pages"
ASSIGNMENTS = "assignments"
ANNOUNCEMENTS = "announcements"
_STAGE_CONFIG: dict[CanvasStage, dict[str, Any]] = {
CanvasStage.PAGES: {
"endpoint": "courses/{course_id}/pages",
"params": {
"per_page": "100",
"include[]": "body",
"published": "true",
"sort": "updated_at",
"order": "desc",
},
},
CanvasStage.ASSIGNMENTS: {
"endpoint": "courses/{course_id}/assignments",
"params": {"per_page": "100", "published": "true"},
},
CanvasStage.ANNOUNCEMENTS: {
"endpoint": "announcements",
"params": {
"per_page": "100",
"context_codes[]": "course_{course_id}",
"active_only": "true",
},
},
}
def _parse_canvas_dt(timestamp_str: str) -> datetime:
"""Parse a Canvas ISO-8601 timestamp (e.g. '2025-06-15T12:00:00Z')
into a timezone-aware UTC datetime.
Canvas returns timestamps with a trailing 'Z' instead of '+00:00',
so we normalise before parsing.
"""
return datetime.fromisoformat(timestamp_str.replace("Z", "+00:00")).astimezone(
timezone.utc
)
def _unix_to_canvas_time(epoch: float) -> str:
"""Convert a Unix timestamp to Canvas ISO-8601 format (e.g. '2025-06-15T12:00:00Z')."""
return datetime.fromtimestamp(epoch, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def _in_time_window(timestamp_str: str, start: float, end: float) -> bool:
"""Check whether a Canvas ISO-8601 timestamp falls within (start, end]."""
return start < _parse_canvas_dt(timestamp_str).timestamp() <= end
class CanvasCourse(BaseModel):
id: int
name: str | None = None
@@ -145,9 +197,6 @@ class CanvasAnnouncement(BaseModel):
)
CanvasStage: TypeAlias = Literal["pages", "assignments", "announcements"]
class CanvasConnectorCheckpoint(ConnectorCheckpoint):
"""Checkpoint state for resumable Canvas indexing.
@@ -165,15 +214,30 @@ class CanvasConnectorCheckpoint(ConnectorCheckpoint):
course_ids: list[int] = []
current_course_index: int = 0
stage: CanvasStage = "pages"
stage: CanvasStage = CanvasStage.PAGES
next_url: str | None = None
def advance_course(self) -> None:
"""Move to the next course and reset within-course state."""
self.current_course_index += 1
self.stage = "pages"
self.stage = CanvasStage.PAGES
self.next_url = None
def advance_stage(self) -> None:
"""Advance past the current stage.
Moves to the next stage within the same course, or to the next
course if the current stage is the last one. Resets next_url so
the next call starts fresh on the new stage.
"""
self.next_url = None
stages: list[CanvasStage] = list(CanvasStage)
next_idx = stages.index(self.stage) + 1
if next_idx < len(stages):
self.stage = stages[next_idx]
else:
self.advance_course()
class CanvasConnector(
CheckpointedConnectorWithPermSync[CanvasConnectorCheckpoint],
@@ -295,13 +359,7 @@ class CanvasConnector(
if body_text:
text_parts.append(body_text)
doc_updated_at = (
datetime.fromisoformat(page.updated_at.replace("Z", "+00:00")).astimezone(
timezone.utc
)
if page.updated_at
else None
)
doc_updated_at = _parse_canvas_dt(page.updated_at) if page.updated_at else None
document = self._build_document(
doc_id=f"canvas-page-{page.course_id}-{page.page_id}",
@@ -325,17 +383,11 @@ class CanvasConnector(
if desc_text:
text_parts.append(desc_text)
if assignment.due_at:
due_dt = datetime.fromisoformat(
assignment.due_at.replace("Z", "+00:00")
).astimezone(timezone.utc)
due_dt = _parse_canvas_dt(assignment.due_at)
text_parts.append(f"Due: {due_dt.strftime('%B %d, %Y %H:%M UTC')}")
doc_updated_at = (
datetime.fromisoformat(
assignment.updated_at.replace("Z", "+00:00")
).astimezone(timezone.utc)
if assignment.updated_at
else None
_parse_canvas_dt(assignment.updated_at) if assignment.updated_at else None
)
document = self._build_document(
@@ -361,11 +413,7 @@ class CanvasConnector(
text_parts.append(msg_text)
doc_updated_at = (
datetime.fromisoformat(
announcement.posted_at.replace("Z", "+00:00")
).astimezone(timezone.utc)
if announcement.posted_at
else None
_parse_canvas_dt(announcement.posted_at) if announcement.posted_at else None
)
document = self._build_document(
@@ -400,6 +448,314 @@ class CanvasConnector(
self._canvas_client = client
return None
def _fetch_stage_page(
self,
next_url: str | None,
endpoint: str,
params: dict[str, Any],
) -> tuple[list[Any], str | None]:
"""Fetch one page of API results for the current stage.
Returns (items, next_url). All error handling is done by the
caller (_load_from_checkpoint).
"""
if next_url:
# Resuming mid-pagination: the next_url from Canvas's
# Link header already contains endpoint + query params.
response, result_next_url = self.canvas_client.get(full_url=next_url)
else:
# First request for this stage: build from endpoint + params.
response, result_next_url = self.canvas_client.get(
endpoint=endpoint, params=params
)
return response or [], result_next_url
def _process_items(
self,
response: list[Any],
stage: CanvasStage,
course_id: int,
start: float,
end: float,
include_permissions: bool,
) -> tuple[list[Document | ConnectorFailure], bool]:
"""Process a page of API results into documents.
Returns (docs, early_exit). early_exit is True when pages
(sorted desc by updated_at) hit an item older than start,
signaling that pagination should stop.
"""
results: list[Document | ConnectorFailure] = []
early_exit = False
for item in response:
try:
if stage == CanvasStage.PAGES:
page = CanvasPage.from_api(item, course_id=course_id)
if not page.updated_at:
continue
# Pages are sorted by updated_at desc — once we see
# an item at or before `start`, all remaining items
# on this and subsequent pages are older too.
if not _in_time_window(page.updated_at, start, end):
if _parse_canvas_dt(page.updated_at).timestamp() <= start:
early_exit = True
break
# ts > end: page is newer than our window, skip it
continue
doc = self._convert_page_to_document(page)
results.append(
self._maybe_attach_permissions(
doc, course_id, include_permissions
)
)
elif stage == CanvasStage.ASSIGNMENTS:
assignment = CanvasAssignment.from_api(item, course_id=course_id)
if not assignment.updated_at or not _in_time_window(
assignment.updated_at, start, end
):
continue
doc = self._convert_assignment_to_document(assignment)
results.append(
self._maybe_attach_permissions(
doc, course_id, include_permissions
)
)
elif stage == CanvasStage.ANNOUNCEMENTS:
announcement = CanvasAnnouncement.from_api(
item, course_id=course_id
)
if not announcement.posted_at:
logger.debug(
f"Skipping announcement {announcement.id} in "
f"course {course_id}: no posted_at"
)
continue
if not _in_time_window(announcement.posted_at, start, end):
continue
doc = self._convert_announcement_to_document(announcement)
results.append(
self._maybe_attach_permissions(
doc, course_id, include_permissions
)
)
except Exception as e:
item_id = item.get("id") or item.get("page_id", "unknown")
if stage == CanvasStage.PAGES:
doc_link = (
f"{self.canvas_base_url}/courses/{course_id}"
f"/pages/{item.get('url', '')}"
)
else:
doc_link = item.get("html_url", "")
results.append(
ConnectorFailure(
failed_document=DocumentFailure(
document_id=f"canvas-{stage.removesuffix('s')}-{course_id}-{item_id}",
document_link=doc_link,
),
failure_message=f"Failed to process {stage.removesuffix('s')}: {e}",
exception=e,
)
)
return results, early_exit
def _maybe_attach_permissions(
self,
document: Document,
course_id: int,
include_permissions: bool,
) -> Document:
if include_permissions:
document.external_access = self._get_course_permissions(course_id)
return document
def _load_from_checkpoint(
self,
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch,
checkpoint: CanvasConnectorCheckpoint,
include_permissions: bool = False,
) -> CheckpointOutput[CanvasConnectorCheckpoint]:
"""Shared implementation for load_from_checkpoint and load_from_checkpoint_with_perm_sync."""
new_checkpoint = checkpoint.model_copy(deep=True)
# First call: materialize the list of course IDs.
# On failure, let the exception propagate so the framework fails the
# attempt cleanly. Swallowing errors here would leave the checkpoint
# state unchanged and cause an infinite retry loop.
if not new_checkpoint.course_ids:
try:
courses = self._list_courses()
except OnyxError as e:
if e.status_code in (401, 403):
_handle_canvas_api_error(e) # NoReturn — always raises
raise
new_checkpoint.course_ids = [c.id for c in courses]
logger.info(f"Found {len(courses)} Canvas courses to process")
new_checkpoint.has_more = len(new_checkpoint.course_ids) > 0
return new_checkpoint
# All courses done.
if new_checkpoint.current_course_index >= len(new_checkpoint.course_ids):
new_checkpoint.has_more = False
return new_checkpoint
course_id = new_checkpoint.course_ids[new_checkpoint.current_course_index]
try:
stage = CanvasStage(new_checkpoint.stage)
except ValueError as e:
raise ValueError(
f"Invalid checkpoint stage: {new_checkpoint.stage!r}. "
f"Valid stages: {[s.value for s in CanvasStage]}"
) from e
# Build endpoint + params from the static template.
config = _STAGE_CONFIG[stage]
endpoint = config["endpoint"].format(course_id=course_id)
params = {k: v.format(course_id=course_id) for k, v in config["params"].items()}
# Only the announcements API supports server-side date filtering
# (start_date/end_date). Pages support server-side sorting
# (sort=updated_at desc) enabling early exit, but not date
# filtering. Assignments support neither. Both are filtered
# client-side via _in_time_window after fetching.
if stage == CanvasStage.ANNOUNCEMENTS:
params["start_date"] = _unix_to_canvas_time(start)
params["end_date"] = _unix_to_canvas_time(end)
try:
response, result_next_url = self._fetch_stage_page(
next_url=new_checkpoint.next_url,
endpoint=endpoint,
params=params,
)
except OnyxError as oe:
# Security errors from _parse_next_link (host/scheme
# mismatch on pagination URLs) have no status code override
# and must not be silenced.
is_api_error = oe._status_code_override is not None
if not is_api_error:
raise
if oe.status_code in (401, 403):
_handle_canvas_api_error(oe) # NoReturn — always raises
# 404 means the course itself is gone or inaccessible. The
# other stages on this course will hit the same 404, so skip
# the whole course rather than burning API calls on each stage.
if oe.status_code == 404:
logger.warning(
f"Canvas course {course_id} not found while fetching "
f"{stage} (HTTP 404). Skipping course."
)
yield ConnectorFailure(
failed_entity=EntityFailure(
entity_id=f"canvas-course-{course_id}",
),
failure_message=(f"Canvas course {course_id} not found: {oe}"),
exception=oe,
)
new_checkpoint.advance_course()
else:
logger.warning(
f"Failed to fetch {stage} for course {course_id}: {oe}. "
f"Skipping remainder of this stage."
)
yield ConnectorFailure(
failed_entity=EntityFailure(
entity_id=f"canvas-{stage}-{course_id}",
),
failure_message=(
f"Failed to fetch {stage} for course {course_id}: {oe}"
),
exception=oe,
)
new_checkpoint.advance_stage()
new_checkpoint.has_more = new_checkpoint.current_course_index < len(
new_checkpoint.course_ids
)
return new_checkpoint
except Exception as e:
# Unknown error — skip the stage and try to continue.
logger.warning(
f"Failed to fetch {stage} for course {course_id}: {e}. "
f"Skipping remainder of this stage."
)
yield ConnectorFailure(
failed_entity=EntityFailure(
entity_id=f"canvas-{stage}-{course_id}",
),
failure_message=(
f"Failed to fetch {stage} for course {course_id}: {e}"
),
exception=e,
)
new_checkpoint.advance_stage()
new_checkpoint.has_more = new_checkpoint.current_course_index < len(
new_checkpoint.course_ids
)
return new_checkpoint
# Process fetched items
results, early_exit = self._process_items(
response, stage, course_id, start, end, include_permissions
)
for result in results:
yield result
# If we hit an item older than our window (pages sorted desc),
# skip remaining pagination and advance to the next stage.
if early_exit:
result_next_url = None
# If there are more pages, save the cursor and return
if result_next_url:
new_checkpoint.next_url = result_next_url
else:
# Stage complete — advance to next stage (or next course if last).
new_checkpoint.advance_stage()
new_checkpoint.has_more = new_checkpoint.current_course_index < len(
new_checkpoint.course_ids
)
return new_checkpoint
@override
def load_from_checkpoint(
self,
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch,
checkpoint: CanvasConnectorCheckpoint,
) -> CheckpointOutput[CanvasConnectorCheckpoint]:
return self._load_from_checkpoint(
start, end, checkpoint, include_permissions=False
)
@override
def load_from_checkpoint_with_perm_sync(
self,
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch,
checkpoint: CanvasConnectorCheckpoint,
) -> CheckpointOutput[CanvasConnectorCheckpoint]:
"""Load documents from checkpoint with permission information included."""
return self._load_from_checkpoint(
start, end, checkpoint, include_permissions=True
)
@override
def build_dummy_checkpoint(self) -> CanvasConnectorCheckpoint:
return CanvasConnectorCheckpoint(has_more=True)
@override
def validate_checkpoint_json(
self, checkpoint_json: str
) -> CanvasConnectorCheckpoint:
return CanvasConnectorCheckpoint.model_validate_json(checkpoint_json)
@override
def validate_connector_settings(self) -> None:
"""Validate Canvas connector settings by testing API access."""
@@ -415,38 +771,6 @@ class CanvasConnector(
f"Unexpected error during Canvas settings validation: {exc}"
)
@override
def load_from_checkpoint(
self,
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch,
checkpoint: CanvasConnectorCheckpoint,
) -> CheckpointOutput[CanvasConnectorCheckpoint]:
# TODO(benwu408): implemented in PR3 (checkpoint)
raise NotImplementedError
@override
def load_from_checkpoint_with_perm_sync(
self,
start: SecondsSinceUnixEpoch,
end: SecondsSinceUnixEpoch,
checkpoint: CanvasConnectorCheckpoint,
) -> CheckpointOutput[CanvasConnectorCheckpoint]:
# TODO(benwu408): implemented in PR3 (checkpoint)
raise NotImplementedError
@override
def build_dummy_checkpoint(self) -> CanvasConnectorCheckpoint:
# TODO(benwu408): implemented in PR3 (checkpoint)
raise NotImplementedError
@override
def validate_checkpoint_json(
self, checkpoint_json: str
) -> CanvasConnectorCheckpoint:
# TODO(benwu408): implemented in PR3 (checkpoint)
raise NotImplementedError
@override
def retrieve_all_slim_docs_perm_sync(
self,

View File

@@ -171,7 +171,10 @@ class ClickupConnector(LoadConnector, PollConnector):
document.metadata[extra_field] = task[extra_field]
if self.retrieve_task_comments:
document.sections.extend(self._get_task_comments(task["id"]))
document.sections = [
*document.sections,
*self._get_task_comments(task["id"]),
]
doc_batch.append(document)

View File

@@ -0,0 +1,65 @@
import csv
import io
from typing import IO
from onyx.connectors.models import TabularSection
from onyx.file_processing.extract_file_text import file_io_to_text
from onyx.file_processing.extract_file_text import xlsx_sheet_extraction
from onyx.file_processing.file_types import OnyxFileExtensions
from onyx.utils.logger import setup_logger
logger = setup_logger()
def is_tabular_file(file_name: str) -> bool:
lowered = file_name.lower()
return any(lowered.endswith(ext) for ext in OnyxFileExtensions.TABULAR_EXTENSIONS)
def _tsv_to_csv(tsv_text: str) -> str:
"""Re-serialize tab-separated text as CSV so downstream parsers that
assume the default Excel dialect read the columns correctly."""
out = io.StringIO()
csv.writer(out, lineterminator="\n").writerows(
csv.reader(io.StringIO(tsv_text), dialect="excel-tab")
)
return out.getvalue().rstrip("\n")
def tabular_file_to_sections(
file: IO[bytes],
file_name: str,
link: str = "",
) -> list[TabularSection]:
"""Convert a tabular file into one or more TabularSections.
- .xlsx → one TabularSection per non-empty sheet.
- .csv / .tsv → a single TabularSection containing the full decoded
file.
Returns an empty list when the file yields no extractable content.
"""
lowered = file_name.lower()
if lowered.endswith(".xlsx"):
return [
TabularSection(link=f"{file_name} :: {sheet_title}", text=csv_text)
for csv_text, sheet_title in xlsx_sheet_extraction(
file, file_name=file_name
)
]
if not lowered.endswith((".csv", ".tsv")):
raise ValueError(f"{file_name!r} is not a tabular file")
try:
text = file_io_to_text(file).strip()
except Exception:
logger.exception(f"Failure decoding {file_name}")
raise
if not text:
return []
if lowered.endswith(".tsv"):
text = _tsv_to_csv(text)
return [TabularSection(link=link or file_name, text=text)]

View File

@@ -75,6 +75,7 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import NormalizationResult
from onyx.connectors.interfaces import Resolver
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
@@ -207,6 +208,7 @@ class DriveIdStatus(Enum):
class GoogleDriveConnector(
SlimConnector,
SlimConnectorWithPermSync,
CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint],
Resolver,
@@ -1754,6 +1756,7 @@ class GoogleDriveConnector(
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
include_permissions: bool = True,
) -> GenerateSlimDocumentOutput:
files_batch: list[RetrievedDriveFile] = []
slim_batch: list[SlimDocument | HierarchyNode] = []
@@ -1763,9 +1766,13 @@ class GoogleDriveConnector(
nonlocal files_batch, slim_batch
# Get new ancestor hierarchy nodes first
permission_sync_context = PermissionSyncContext(
primary_admin_email=self.primary_admin_email,
google_domain=self.google_domain,
permission_sync_context = (
PermissionSyncContext(
primary_admin_email=self.primary_admin_email,
google_domain=self.google_domain,
)
if include_permissions
else None
)
new_ancestors = self._get_new_ancestors_for_files(
files=files_batch,
@@ -1779,10 +1786,7 @@ class GoogleDriveConnector(
if doc := build_slim_document(
self.creds,
file.drive_file,
PermissionSyncContext(
primary_admin_email=self.primary_admin_email,
google_domain=self.google_domain,
),
permission_sync_context,
retriever_email=file.user_email,
):
slim_batch.append(doc)
@@ -1822,11 +1826,12 @@ class GoogleDriveConnector(
if files_batch:
yield _yield_slim_batch()
def retrieve_all_slim_docs_perm_sync(
def _retrieve_all_slim_docs_impl(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
include_permissions: bool = True,
) -> GenerateSlimDocumentOutput:
try:
checkpoint = self.build_dummy_checkpoint()
@@ -1836,13 +1841,34 @@ class GoogleDriveConnector(
start=start,
end=end,
callback=callback,
include_permissions=include_permissions,
)
logger.info("Drive perm sync: Slim doc retrieval complete")
logger.info("Drive slim doc retrieval complete")
except Exception as e:
if MISSING_SCOPES_ERROR_STR in str(e):
raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
raise e
raise
@override
def retrieve_all_slim_docs(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
) -> GenerateSlimDocumentOutput:
return self._retrieve_all_slim_docs_impl(
start=start, end=end, callback=callback, include_permissions=False
)
def retrieve_all_slim_docs_perm_sync(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
) -> GenerateSlimDocumentOutput:
return self._retrieve_all_slim_docs_impl(
start=start, end=end, callback=callback, include_permissions=True
)
def validate_connector_settings(self) -> None:
if self._creds is None:

View File

@@ -123,6 +123,9 @@ class SlimConnector(BaseConnector):
@abc.abstractmethod
def retrieve_all_slim_docs(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
) -> GenerateSlimDocumentOutput:
raise NotImplementedError

View File

@@ -1,4 +1,5 @@
import sys
from collections.abc import Sequence
from datetime import datetime
from enum import Enum
from typing import Any
@@ -39,6 +40,7 @@ class SectionType(str, Enum):
TEXT = "text"
IMAGE = "image"
TABULAR = "tabular"
class Section(BaseModel):
@@ -70,6 +72,18 @@ class ImageSection(Section):
return sys.getsizeof(self.image_file_id) + sys.getsizeof(self.link)
class TabularSection(Section):
"""Section containing tabular data (csv/tsv content, or one sheet of
an xlsx workbook rendered as CSV)."""
type: Literal[SectionType.TABULAR] = SectionType.TABULAR
text: str # CSV representation in a string
link: str
def __sizeof__(self) -> int:
return sys.getsizeof(self.text) + sys.getsizeof(self.link)
class BasicExpertInfo(BaseModel):
"""Basic Information for the owner of a document, any of the fields can be left as None
Display fallback goes as follows:
@@ -171,7 +185,7 @@ class DocumentBase(BaseModel):
"""Used for Onyx ingestion api, the ID is inferred before use if not provided"""
id: str | None = None
sections: list[TextSection | ImageSection]
sections: Sequence[TextSection | ImageSection | TabularSection]
source: DocumentSource | None = None
semantic_identifier: str # displayed in the UI as the main identifier for the doc
# TODO(andrei): Ideally we could improve this to where each value is just a
@@ -381,12 +395,9 @@ class IndexingDocument(Document):
)
else:
section_len = sum(
(
len(section.text)
if isinstance(section, TextSection) and section.text is not None
else 0
)
len(section.text) if section.text is not None else 0
for section in self.sections
if isinstance(section, (TextSection, TabularSection))
)
return title_len + section_len

View File

@@ -750,31 +750,3 @@ def resync_cc_pair(
)
db_session.commit()
# ── Metrics query helpers ──────────────────────────────────────────────
def get_connector_health_for_metrics(
db_session: Session,
) -> list: # Returns list of Row tuples
"""Return connector health data for Prometheus metrics.
Each row is (cc_pair_id, status, in_repeated_error_state,
last_successful_index_time, name, source).
"""
return (
db_session.query(
ConnectorCredentialPair.id,
ConnectorCredentialPair.status,
ConnectorCredentialPair.in_repeated_error_state,
ConnectorCredentialPair.last_successful_index_time,
ConnectorCredentialPair.name,
Connector.source,
)
.join(
Connector,
ConnectorCredentialPair.connector_id == Connector.id,
)
.all()
)

View File

@@ -2,8 +2,6 @@ from collections.abc import Sequence
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from typing import NamedTuple
from typing import TYPE_CHECKING
from typing import TypeVarTuple
from sqlalchemy import and_
@@ -30,17 +28,6 @@ from onyx.utils.logger import setup_logger
from onyx.utils.telemetry import optional_telemetry
from onyx.utils.telemetry import RecordType
if TYPE_CHECKING:
from onyx.configs.constants import DocumentSource
# from sqlalchemy.sql.selectable import Select
# Comment out unused imports that cause mypy errors
# from onyx.auth.models import UserRole
# from onyx.configs.constants import MAX_LAST_VALID_CHECKPOINT_AGE_SECONDS
# from onyx.db.connector_credential_pair import ConnectorCredentialPairIdentifier
# from onyx.db.engine import async_query_for_dms
logger = setup_logger()
@@ -981,104 +968,48 @@ def get_index_attempt_errors_for_cc_pair(
return list(db_session.scalars(stmt).all())
# ── Metrics query helpers ──────────────────────────────────────────────
class ActiveIndexAttemptMetric(NamedTuple):
"""Row returned by get_active_index_attempts_for_metrics."""
status: IndexingStatus
source: "DocumentSource"
cc_pair_id: int
cc_pair_name: str | None
attempt_count: int
def get_active_index_attempts_for_metrics(
def get_index_attempt_errors_across_connectors(
db_session: Session,
) -> list[ActiveIndexAttemptMetric]:
"""Return non-terminal index attempts grouped by status, source, and connector.
cc_pair_id: int | None = None,
error_type: str | None = None,
start_time: datetime | None = None,
end_time: datetime | None = None,
unresolved_only: bool = True,
page: int = 0,
page_size: int = 25,
) -> tuple[list[IndexAttemptError], int]:
"""Query index attempt errors across all connectors with optional filters.
Each row is (status, source, cc_pair_id, cc_pair_name, attempt_count).
Returns (errors, total_count) for pagination.
"""
from onyx.db.models import Connector
stmt = select(IndexAttemptError)
count_stmt = select(func.count()).select_from(IndexAttemptError)
terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
rows = (
db_session.query(
IndexAttempt.status,
Connector.source,
ConnectorCredentialPair.id,
ConnectorCredentialPair.name,
func.count(),
if cc_pair_id is not None:
stmt = stmt.where(IndexAttemptError.connector_credential_pair_id == cc_pair_id)
count_stmt = count_stmt.where(
IndexAttemptError.connector_credential_pair_id == cc_pair_id
)
.join(
ConnectorCredentialPair,
IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id,
)
.join(
Connector,
ConnectorCredentialPair.connector_id == Connector.id,
)
.filter(IndexAttempt.status.notin_(terminal_statuses))
.group_by(
IndexAttempt.status,
Connector.source,
ConnectorCredentialPair.id,
ConnectorCredentialPair.name,
)
.all()
)
return [ActiveIndexAttemptMetric(*row) for row in rows]
if error_type is not None:
stmt = stmt.where(IndexAttemptError.error_type == error_type)
count_stmt = count_stmt.where(IndexAttemptError.error_type == error_type)
def get_failed_attempt_counts_by_cc_pair(
db_session: Session,
since: datetime | None = None,
) -> dict[int, int]:
"""Return {cc_pair_id: failed_attempt_count} for all connectors.
if unresolved_only:
stmt = stmt.where(IndexAttemptError.is_resolved.is_(False))
count_stmt = count_stmt.where(IndexAttemptError.is_resolved.is_(False))
When ``since`` is provided, only attempts created after that timestamp
are counted. Defaults to the last 90 days to avoid unbounded historical
aggregation.
"""
if since is None:
since = datetime.now(timezone.utc) - timedelta(days=90)
if start_time is not None:
stmt = stmt.where(IndexAttemptError.time_created >= start_time)
count_stmt = count_stmt.where(IndexAttemptError.time_created >= start_time)
rows = (
db_session.query(
IndexAttempt.connector_credential_pair_id,
func.count(),
)
.filter(IndexAttempt.status == IndexingStatus.FAILED)
.filter(IndexAttempt.time_created >= since)
.group_by(IndexAttempt.connector_credential_pair_id)
.all()
)
return {cc_id: count for cc_id, count in rows}
if end_time is not None:
stmt = stmt.where(IndexAttemptError.time_created <= end_time)
count_stmt = count_stmt.where(IndexAttemptError.time_created <= end_time)
stmt = stmt.order_by(desc(IndexAttemptError.time_created))
stmt = stmt.offset(page * page_size).limit(page_size)
def get_docs_indexed_by_cc_pair(
db_session: Session,
since: datetime | None = None,
) -> dict[int, int]:
"""Return {cc_pair_id: total_new_docs_indexed} across successful attempts.
Only counts attempts with status SUCCESS to avoid inflating counts with
partial results from failed attempts. When ``since`` is provided, only
attempts created after that timestamp are included.
"""
if since is None:
since = datetime.now(timezone.utc) - timedelta(days=90)
query = (
db_session.query(
IndexAttempt.connector_credential_pair_id,
func.sum(func.coalesce(IndexAttempt.new_docs_indexed, 0)),
)
.filter(IndexAttempt.status == IndexingStatus.SUCCESS)
.filter(IndexAttempt.time_created >= since)
.group_by(IndexAttempt.connector_credential_pair_id)
)
rows = query.all()
return {cc_id: int(total or 0) for cc_id, total in rows}
total = db_session.scalar(count_stmt) or 0
errors = list(db_session.scalars(stmt).all())
return errors, total

View File

@@ -379,13 +379,25 @@ def _worksheet_to_matrix(
worksheet: Worksheet,
) -> list[list[str]]:
"""
Converts a singular worksheet to a matrix of values
Converts a singular worksheet to a matrix of values.
Rows are padded to a uniform width. In openpyxl's read_only mode,
iter_rows can yield rows of differing lengths (trailing empty cells
are sometimes omitted), and downstream column cleanup assumes a
rectangular matrix.
"""
rows: list[list[str]] = []
max_len = 0
for worksheet_row in worksheet.iter_rows(min_row=1, values_only=True):
row = ["" if cell is None else str(cell) for cell in worksheet_row]
if len(row) > max_len:
max_len = len(row)
rows.append(row)
for row in rows:
if len(row) < max_len:
row.extend([""] * (max_len - len(row)))
return rows
@@ -463,29 +475,13 @@ def _remove_empty_runs(
return result
def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
# TODO: switch back to this approach in a few months when markitdown
# fixes their handling of excel files
def xlsx_sheet_extraction(file: IO[Any], file_name: str = "") -> list[tuple[str, str]]:
"""
Converts each sheet in the excel file to a csv condensed string.
Returns a string and the worksheet title for each worksheet
# md = get_markitdown_converter()
# stream_info = StreamInfo(
# mimetype=SPREADSHEET_MIME_TYPE, filename=file_name or None, extension=".xlsx"
# )
# try:
# workbook = md.convert(to_bytesio(file), stream_info=stream_info)
# except (
# BadZipFile,
# ValueError,
# FileConversionException,
# UnsupportedFormatException,
# ) as e:
# error_str = f"Failed to extract text from {file_name or 'xlsx file'}: {e}"
# if file_name.startswith("~"):
# logger.debug(error_str + " (this is expected for files with ~)")
# else:
# logger.warning(error_str)
# return ""
# return workbook.markdown
Returns a list of (csv_text, sheet)
"""
try:
workbook = openpyxl.load_workbook(file, read_only=True)
except BadZipFile as e:
@@ -494,23 +490,30 @@ def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
logger.debug(error_str + " (this is expected for files with ~)")
else:
logger.warning(error_str)
return ""
return []
except Exception as e:
if any(s in str(e) for s in KNOWN_OPENPYXL_BUGS):
logger.error(
f"Failed to extract text from {file_name or 'xlsx file'}. This happens due to a bug in openpyxl. {e}"
)
return ""
return []
raise
text_content = []
sheets: list[tuple[str, str]] = []
for sheet in workbook.worksheets:
sheet_matrix = _clean_worksheet_matrix(_worksheet_to_matrix(sheet))
buf = io.StringIO()
writer = csv.writer(buf, lineterminator="\n")
writer.writerows(sheet_matrix)
text_content.append(buf.getvalue().rstrip("\n"))
return TEXT_SECTION_SEPARATOR.join(text_content)
csv_text = buf.getvalue().rstrip("\n")
if csv_text.strip():
sheets.append((csv_text, sheet.title))
return sheets
def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
sheets = xlsx_sheet_extraction(file, file_name)
return TEXT_SECTION_SEPARATOR.join(csv_text for csv_text, _title in sheets)
def eml_to_text(file: IO[Any]) -> str:

View File

@@ -7,6 +7,7 @@ from onyx.indexing.chunking.image_section_chunker import ImageChunker
from onyx.indexing.chunking.section_chunker import AccumulatorState
from onyx.indexing.chunking.section_chunker import ChunkPayload
from onyx.indexing.chunking.section_chunker import SectionChunker
from onyx.indexing.chunking.tabular_section_chunker import TabularChunker
from onyx.indexing.chunking.text_section_chunker import TextChunker
from onyx.indexing.models import DocAwareChunk
from onyx.natural_language_processing.utils import BaseTokenizer
@@ -38,6 +39,7 @@ class DocumentChunker:
chunk_splitter=chunk_splitter,
),
SectionType.IMAGE: ImageChunker(),
SectionType.TABULAR: TabularChunker(tokenizer=tokenizer),
}
def chunk(
@@ -99,7 +101,9 @@ class DocumentChunker:
payloads.extend(result.payloads)
accumulator = result.accumulator
# Final flush — any leftover buffered text becomes one last payload.
payloads.extend(accumulator.flush_to_list())
return payloads
def _select_chunker(self, section: Section) -> SectionChunker:

View File

@@ -0,0 +1,272 @@
import csv
import io
from collections.abc import Iterable
from pydantic import BaseModel
from onyx.connectors.models import Section
from onyx.indexing.chunking.section_chunker import AccumulatorState
from onyx.indexing.chunking.section_chunker import ChunkPayload
from onyx.indexing.chunking.section_chunker import SectionChunker
from onyx.indexing.chunking.section_chunker import SectionChunkerOutput
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.natural_language_processing.utils import count_tokens
from onyx.natural_language_processing.utils import split_text_by_tokens
from onyx.utils.logger import setup_logger
logger = setup_logger()
COLUMNS_MARKER = "Columns:"
FIELD_VALUE_SEPARATOR = ", "
ROW_JOIN = "\n"
NEWLINE_TOKENS = 1
class _ParsedRow(BaseModel):
header: list[str]
row: list[str]
class _TokenizedText(BaseModel):
text: str
token_count: int
def format_row(header: list[str], row: list[str]) -> str:
"""
A header-row combination is formatted like this:
field1=value1, field2=value2, field3=value3
"""
pairs = _row_to_pairs(header, row)
formatted = FIELD_VALUE_SEPARATOR.join(f"{h}={v}" for h, v in pairs)
return formatted
def format_columns_header(headers: list[str]) -> str:
"""
Format the column header line. Underscored headers get a
space-substituted friendly alias in parens.
Example:
headers = ["id", "MTTR_hours"]
=> "Columns: id, MTTR_hours (MTTR hours)"
"""
parts: list[str] = []
for header in headers:
friendly = header
if "_" in header:
friendly = f'{header} ({header.replace("_", " ")})'
parts.append(friendly)
return f"{COLUMNS_MARKER} " + FIELD_VALUE_SEPARATOR.join(parts)
def parse_section(section: Section) -> list[_ParsedRow]:
"""Parse CSV into headers + rows. First non-empty row is the header;
blank rows are skipped."""
section_text = section.text or ""
if not section_text.strip():
return []
reader = csv.reader(io.StringIO(section_text))
non_empty_rows = [row for row in reader if any(cell.strip() for cell in row)]
if not non_empty_rows:
return []
header, *data_rows = non_empty_rows
return [_ParsedRow(header=header, row=row) for row in data_rows]
def _row_to_pairs(headers: list[str], row: list[str]) -> list[tuple[str, str]]:
return [(h, v) for h, v in zip(headers, row) if v.strip()]
def pack_chunk(chunk: str, new_row: str) -> str:
return chunk + "\n" + new_row
def _split_row_by_pairs(
pairs: list[tuple[str, str]],
tokenizer: BaseTokenizer,
max_tokens: int,
) -> list[_TokenizedText]:
"""Greedily pack pairs into max-sized pieces. Any single pair that
itself exceeds ``max_tokens`` is token-split at id boundaries.
No headers."""
separator_tokens = count_tokens(FIELD_VALUE_SEPARATOR, tokenizer)
pieces: list[_TokenizedText] = []
current_parts: list[str] = []
current_tokens = 0
for pair in pairs:
pair_str = f"{pair[0]}={pair[1]}"
pair_tokens = count_tokens(pair_str, tokenizer)
increment = pair_tokens if not current_parts else separator_tokens + pair_tokens
if current_tokens + increment <= max_tokens:
current_parts.append(pair_str)
current_tokens += increment
continue
if current_parts:
pieces.append(
_TokenizedText(
text=FIELD_VALUE_SEPARATOR.join(current_parts),
token_count=current_tokens,
)
)
current_parts = []
current_tokens = 0
if pair_tokens > max_tokens:
for split_text in split_text_by_tokens(pair_str, tokenizer, max_tokens):
pieces.append(
_TokenizedText(
text=split_text,
token_count=count_tokens(split_text, tokenizer),
)
)
else:
current_parts = [pair_str]
current_tokens = pair_tokens
if current_parts:
pieces.append(
_TokenizedText(
text=FIELD_VALUE_SEPARATOR.join(current_parts),
token_count=current_tokens,
)
)
return pieces
def _build_chunk_from_scratch(
pairs: list[tuple[str, str]],
formatted_row: str,
row_tokens: int,
column_header: str,
column_header_tokens: int,
sheet_header: str,
sheet_header_tokens: int,
tokenizer: BaseTokenizer,
max_tokens: int,
) -> list[_TokenizedText]:
# 1. Row alone is too large — split by pairs, no headers.
if row_tokens > max_tokens:
return _split_row_by_pairs(pairs, tokenizer, max_tokens)
chunk = formatted_row
chunk_tokens = row_tokens
# 2. Attempt to add column header
candidate_tokens = column_header_tokens + NEWLINE_TOKENS + chunk_tokens
if candidate_tokens <= max_tokens:
chunk = column_header + ROW_JOIN + chunk
chunk_tokens = candidate_tokens
# 3. Attempt to add sheet header
if sheet_header:
candidate_tokens = sheet_header_tokens + NEWLINE_TOKENS + chunk_tokens
if candidate_tokens <= max_tokens:
chunk = sheet_header + ROW_JOIN + chunk
chunk_tokens = candidate_tokens
return [_TokenizedText(text=chunk, token_count=chunk_tokens)]
def parse_to_chunks(
rows: Iterable[_ParsedRow],
sheet_header: str,
tokenizer: BaseTokenizer,
max_tokens: int,
) -> list[str]:
rows_list = list(rows)
if not rows_list:
return []
column_header = format_columns_header(rows_list[0].header)
column_header_tokens = count_tokens(column_header, tokenizer)
sheet_header_tokens = count_tokens(sheet_header, tokenizer) if sheet_header else 0
chunks: list[str] = []
current_chunk = ""
current_chunk_tokens = 0
for row in rows_list:
pairs: list[tuple[str, str]] = _row_to_pairs(row.header, row.row)
formatted = format_row(row.header, row.row)
row_tokens = count_tokens(formatted, tokenizer)
if current_chunk:
# Attempt to pack it in (additive approximation)
if current_chunk_tokens + NEWLINE_TOKENS + row_tokens <= max_tokens:
current_chunk = pack_chunk(current_chunk, formatted)
current_chunk_tokens += NEWLINE_TOKENS + row_tokens
continue
# Doesn't fit — flush and start new
chunks.append(current_chunk)
current_chunk = ""
current_chunk_tokens = 0
# Build chunk from scratch
for piece in _build_chunk_from_scratch(
pairs=pairs,
formatted_row=formatted,
row_tokens=row_tokens,
column_header=column_header,
column_header_tokens=column_header_tokens,
sheet_header=sheet_header,
sheet_header_tokens=sheet_header_tokens,
tokenizer=tokenizer,
max_tokens=max_tokens,
):
if current_chunk:
chunks.append(current_chunk)
current_chunk = piece.text
current_chunk_tokens = piece.token_count
# Flush remaining
if current_chunk:
chunks.append(current_chunk)
return chunks
class TabularChunker(SectionChunker):
def __init__(self, tokenizer: BaseTokenizer) -> None:
self.tokenizer = tokenizer
def chunk_section(
self,
section: Section,
accumulator: AccumulatorState,
content_token_limit: int,
) -> SectionChunkerOutput:
payloads = accumulator.flush_to_list()
parsed_rows = parse_section(section)
if not parsed_rows:
logger.warning(
f"TabularChunker: skipping unparseable section (link={section.link})"
)
return SectionChunkerOutput(
payloads=payloads, accumulator=AccumulatorState()
)
sheet_header = section.link or ""
chunk_texts = parse_to_chunks(
rows=parsed_rows,
sheet_header=sheet_header,
tokenizer=self.tokenizer,
max_tokens=content_token_limit,
)
for i, text in enumerate(chunk_texts):
payloads.append(
ChunkPayload(
text=text,
links={0: section.link or ""},
is_continuation=(i > 0),
)
)
return SectionChunkerOutput(payloads=payloads, accumulator=AccumulatorState())

View File

@@ -10,6 +10,7 @@ from onyx.indexing.chunking.section_chunker import SectionChunker
from onyx.indexing.chunking.section_chunker import SectionChunkerOutput
from onyx.natural_language_processing.utils import BaseTokenizer
from onyx.natural_language_processing.utils import count_tokens
from onyx.natural_language_processing.utils import split_text_by_tokens
from onyx.utils.text_processing import clean_text
from onyx.utils.text_processing import shared_precompare_cleanup
from shared_configs.configs import STRICT_CHUNK_TOKEN_LIMIT
@@ -90,8 +91,8 @@ class TextChunker(SectionChunker):
STRICT_CHUNK_TOKEN_LIMIT
and count_tokens(split_text, self.tokenizer) > content_token_limit
):
smaller_chunks = self._split_oversized_chunk(
split_text, content_token_limit
smaller_chunks = split_text_by_tokens(
split_text, self.tokenizer, content_token_limit
)
for j, small_chunk in enumerate(smaller_chunks):
payloads.append(
@@ -114,16 +115,3 @@ class TextChunker(SectionChunker):
payloads=payloads,
accumulator=AccumulatorState(),
)
def _split_oversized_chunk(self, text: str, content_token_limit: int) -> list[str]:
tokens = self.tokenizer.tokenize(text)
chunks: list[str] = []
start = 0
total_tokens = len(tokens)
while start < total_tokens:
end = min(start + content_token_limit, total_tokens)
token_chunk = tokens[start:end]
chunk_text = " ".join(token_chunk)
chunks.append(chunk_text)
start = end
return chunks

View File

@@ -3,6 +3,8 @@ from abc import ABC
from abc import abstractmethod
from collections import defaultdict
import sentry_sdk
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorStopSignal
from onyx.connectors.models import DocumentFailure
@@ -291,6 +293,13 @@ def embed_chunks_with_failure_handling(
)
embedded_chunks.extend(doc_embedded_chunks)
except Exception as e:
with sentry_sdk.new_scope() as scope:
scope.set_tag("stage", "embedding")
scope.set_tag("doc_id", doc_id)
if tenant_id:
scope.set_tag("tenant_id", tenant_id)
scope.fingerprint = ["embedding-failure", type(e).__name__]
sentry_sdk.capture_exception(e)
logger.exception(f"Failed to embed chunks for document '{doc_id}'")
failures.append(
ConnectorFailure(

View File

@@ -5,6 +5,7 @@ from collections.abc import Iterator
from contextlib import contextmanager
from typing import Protocol
import sentry_sdk
from pydantic import BaseModel
from pydantic import ConfigDict
from sqlalchemy.orm import Session
@@ -332,6 +333,13 @@ def index_doc_batch_with_handler(
except Exception as e:
# don't log the batch directly, it's too much text
document_ids = [doc.id for doc in document_batch]
with sentry_sdk.new_scope() as scope:
scope.set_tag("stage", "indexing_pipeline")
scope.set_tag("tenant_id", tenant_id)
scope.set_tag("batch_size", str(len(document_batch)))
scope.set_extra("document_ids", document_ids)
scope.fingerprint = ["indexing-pipeline-failure", type(e).__name__]
sentry_sdk.capture_exception(e)
logger.exception(f"Failed to index document batch: {document_ids}")
index_pipeline_result = IndexingPipelineResult(

View File

@@ -6,6 +6,7 @@ from itertools import chain
from itertools import groupby
import httpx
import sentry_sdk
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import DocumentFailure
@@ -88,6 +89,12 @@ def write_chunks_to_vector_db_with_backoff(
)
)
except Exception as e:
with sentry_sdk.new_scope() as scope:
scope.set_tag("stage", "vector_db_write")
scope.set_tag("doc_id", doc_id)
scope.set_tag("tenant_id", index_batch_params.tenant_id)
scope.fingerprint = ["vector-db-write-failure", type(e).__name__]
sentry_sdk.capture_exception(e)
logger.exception(
f"Failed to write document chunks for '{doc_id}' to vector db"
)

View File

@@ -434,11 +434,14 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
lifespan=lifespan_override or lifespan,
)
if SENTRY_DSN:
from onyx.configs.sentry import _add_instance_tags
sentry_sdk.init(
dsn=SENTRY_DSN,
integrations=[StarletteIntegration(), FastApiIntegration()],
traces_sample_rate=0.1,
release=__version__,
before_send=_add_instance_tags,
)
logger.info("Sentry initialized")
else:

View File

@@ -201,6 +201,33 @@ def count_tokens(
return total
def split_text_by_tokens(
text: str,
tokenizer: BaseTokenizer,
max_tokens: int,
) -> list[str]:
"""Split ``text`` into pieces of ≤ ``max_tokens`` tokens each, via
encode/decode at token-id boundaries.
Note: the returned pieces are not strictly guaranteed to re-tokenize to
≤ max_tokens. BPE merges at window boundaries may drift by a few tokens,
and cuts landing mid-multi-byte-UTF-8-character produce replacement
characters on decode. Good enough for "best-effort" splitting of
oversized content, not for hard limit enforcement.
"""
if not text:
return []
token_ids: list[int] = []
for start in range(0, len(text), _ENCODE_CHUNK_SIZE):
token_ids.extend(tokenizer.encode(text[start : start + _ENCODE_CHUNK_SIZE]))
return [
tokenizer.decode(token_ids[start : start + max_tokens])
for start in range(0, len(token_ids), max_tokens)
]
def tokenizer_trim_content(
content: str, desired_length: int, tokenizer: BaseTokenizer
) -> str:

View File

@@ -185,6 +185,10 @@ class MinimalPersonaSnapshot(BaseModel):
for doc_set in persona.document_sets:
for cc_pair in doc_set.connector_credential_pairs:
sources.add(cc_pair.connector.source)
for fed_ds in doc_set.federated_connectors:
non_fed = fed_ds.federated_connector.source.to_non_federated_source()
if non_fed is not None:
sources.add(non_fed)
# Sources from hierarchy nodes
for node in persona.hierarchy_nodes:
@@ -195,6 +199,9 @@ class MinimalPersonaSnapshot(BaseModel):
if doc.parent_hierarchy_node:
sources.add(doc.parent_hierarchy_node.source)
if persona.user_files:
sources.add(DocumentSource.USER_FILE)
return MinimalPersonaSnapshot(
# Core fields actually used by ChatPage
id=persona.id,

View File

@@ -11,6 +11,7 @@ from sqlalchemy.orm import Session
from onyx.auth.permissions import require_permission
from onyx.auth.users import current_curator_or_admin_user
from onyx.background.celery.versioned_apps.client import app as client_app
from onyx.background.indexing.models import IndexAttemptErrorPydantic
from onyx.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import KV_GEN_AI_KEY_CHECK_TIME
@@ -28,6 +29,7 @@ from onyx.db.feedback import fetch_docs_ranked_by_boost_for_user
from onyx.db.feedback import update_document_boost_for_user
from onyx.db.feedback import update_document_hidden_for_user
from onyx.db.index_attempt import cancel_indexing_attempts_for_ccpair
from onyx.db.index_attempt import get_index_attempt_errors_across_connectors
from onyx.db.models import User
from onyx.file_store.file_store import get_default_file_store
from onyx.key_value_store.factory import get_kv_store
@@ -35,6 +37,7 @@ from onyx.key_value_store.interface import KvKeyNotFoundError
from onyx.llm.factory import get_default_llm
from onyx.llm.utils import test_llm
from onyx.server.documents.models import ConnectorCredentialPairIdentifier
from onyx.server.documents.models import PaginatedReturn
from onyx.server.manage.models import BoostDoc
from onyx.server.manage.models import BoostUpdateRequest
from onyx.server.manage.models import HiddenUpdateRequest
@@ -206,3 +209,40 @@ def create_deletion_attempt_for_connector_id(
file_store = get_default_file_store()
for file_id in connector.connector_specific_config.get("file_locations", []):
file_store.delete_file(file_id)
@router.get("/admin/indexing/failed-documents")
def get_failed_documents(
cc_pair_id: int | None = None,
error_type: str | None = None,
start_time: datetime | None = None,
end_time: datetime | None = None,
include_resolved: bool = False,
page_num: int = 0,
page_size: int = 25,
_: User = Depends(require_permission(Permission.FULL_ADMIN_PANEL_ACCESS)),
db_session: Session = Depends(get_session),
) -> PaginatedReturn[IndexAttemptErrorPydantic]:
"""Get indexing errors across all connectors with optional filters.
Provides a cross-connector view of document indexing failures.
Defaults to last 30 days if no start_time is provided to avoid
unbounded count queries.
"""
if start_time is None:
start_time = datetime.now(tz=timezone.utc) - timedelta(days=30)
errors, total = get_index_attempt_errors_across_connectors(
db_session=db_session,
cc_pair_id=cc_pair_id,
error_type=error_type,
start_time=start_time,
end_time=end_time,
unresolved_only=not include_resolved,
page=page_num,
page_size=page_size,
)
return PaginatedReturn(
items=[IndexAttemptErrorPydantic.from_model(e) for e in errors],
total_items=total,
)

View File

@@ -183,6 +183,9 @@ def generate_ollama_display_name(model_name: str) -> str:
"qwen2.5:7b""Qwen 2.5 7B"
"mistral:latest""Mistral"
"deepseek-r1:14b""DeepSeek R1 14B"
"gemma4:e4b""Gemma 4 E4B"
"deepseek-v3.1:671b-cloud""DeepSeek V3.1 671B Cloud"
"qwen3-vl:235b-instruct-cloud""Qwen 3-vl 235B Instruct Cloud"
"""
# Split into base name and tag
if ":" in model_name:
@@ -209,13 +212,24 @@ def generate_ollama_display_name(model_name: str) -> str:
# Default: Title case with dashes converted to spaces
display_name = base.replace("-", " ").title()
# Process tag to extract size info (skip "latest")
# Process tag (skip "latest")
if tag and tag.lower() != "latest":
# Extract size like "7b", "70b", "14b"
size_match = re.match(r"^(\d+(?:\.\d+)?[bBmM])", tag)
# Check for size prefix like "7b", "70b", optionally followed by modifiers
size_match = re.match(r"^(\d+(?:\.\d+)?[bBmM])(-.+)?$", tag)
if size_match:
size = size_match.group(1).upper()
display_name = f"{display_name} {size}"
remainder = size_match.group(2)
if remainder:
# Format modifiers like "-cloud", "-instruct-cloud"
modifiers = " ".join(
p.title() for p in remainder.strip("-").split("-") if p
)
display_name = f"{display_name} {size} {modifiers}"
else:
display_name = f"{display_name} {size}"
else:
# Non-size tags like "e4b", "q4_0", "fp16", "cloud"
display_name = f"{display_name} {tag.upper()}"
return display_name

View File

@@ -1,13 +1,14 @@
import json
import secrets
from collections.abc import AsyncIterator
from fastapi import APIRouter
from fastapi import Depends
from fastapi import File
from fastapi import Query
from fastapi import UploadFile
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from pydantic import Field
from sqlalchemy.orm import Session
from onyx.auth.permissions import require_permission
@@ -113,28 +114,47 @@ async def transcribe_audio(
) from exc
def _extract_provider_error(exc: Exception) -> str:
"""Extract a human-readable message from a provider exception.
Provider errors often embed JSON from upstream APIs (e.g. ElevenLabs).
This tries to parse a readable ``message`` field out of common JSON
error shapes; falls back to ``str(exc)`` if nothing better is found.
"""
raw = str(exc)
try:
# Many providers embed JSON after a prefix like "ElevenLabs TTS failed: {...}"
json_start = raw.find("{")
if json_start == -1:
return raw
parsed = json.loads(raw[json_start:])
# Shape: {"detail": {"message": "..."}} (ElevenLabs)
detail = parsed.get("detail", parsed)
if isinstance(detail, dict):
return detail.get("message") or detail.get("error") or raw
if isinstance(detail, str):
return detail
except (json.JSONDecodeError, AttributeError, TypeError):
pass
return raw
class SynthesizeRequest(BaseModel):
text: str = Field(..., min_length=1)
voice: str | None = None
speed: float | None = Field(default=None, ge=0.5, le=2.0)
@router.post("/synthesize")
async def synthesize_speech(
text: str | None = Query(
default=None, description="Text to synthesize", max_length=4096
),
voice: str | None = Query(default=None, description="Voice ID to use"),
speed: float | None = Query(
default=None, description="Playback speed (0.5-2.0)", ge=0.5, le=2.0
),
body: SynthesizeRequest,
user: User = Depends(require_permission(Permission.BASIC_ACCESS)),
) -> StreamingResponse:
"""
Synthesize text to speech using the default TTS provider.
Accepts parameters via query string for streaming compatibility.
"""
logger.info(
f"TTS request: text length={len(text) if text else 0}, voice={voice}, speed={speed}"
)
if not text:
raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Text is required")
"""Synthesize text to speech using the default TTS provider."""
text = body.text
voice = body.voice
speed = body.speed
logger.info(f"TTS request: text length={len(text)}, voice={voice}, speed={speed}")
# Use short-lived session to fetch provider config, then release connection
# before starting the long-running streaming response
@@ -177,31 +197,36 @@ async def synthesize_speech(
logger.error(f"Failed to get voice provider: {exc}")
raise OnyxError(OnyxErrorCode.INTERNAL_ERROR, str(exc)) from exc
# Session is now closed - streaming response won't hold DB connection
# Pull the first chunk before returning the StreamingResponse. If the
# provider rejects the request (e.g. text too long), the error surfaces
# as a proper HTTP error instead of a broken audio stream.
stream_iter = provider.synthesize_stream(
text=text, voice=final_voice, speed=final_speed
)
try:
first_chunk = await stream_iter.__anext__()
except StopAsyncIteration:
raise OnyxError(OnyxErrorCode.INTERNAL_ERROR, "TTS provider returned no audio")
except Exception as exc:
raise OnyxError(
OnyxErrorCode.BAD_GATEWAY, _extract_provider_error(exc)
) from exc
async def audio_stream() -> AsyncIterator[bytes]:
try:
chunk_count = 0
async for chunk in provider.synthesize_stream(
text=text, voice=final_voice, speed=final_speed
):
chunk_count += 1
yield chunk
logger.info(f"TTS streaming complete: {chunk_count} chunks sent")
except NotImplementedError as exc:
logger.error(f"TTS not implemented: {exc}")
raise
except Exception as exc:
logger.error(f"Synthesis failed: {exc}")
raise
yield first_chunk
chunk_count = 1
async for chunk in stream_iter:
chunk_count += 1
yield chunk
logger.info(f"TTS streaming complete: {chunk_count} chunks sent")
return StreamingResponse(
audio_stream(),
media_type="audio/mpeg",
headers={
"Content-Disposition": "inline; filename=speech.mp3",
# Allow streaming by not setting content-length
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no", # Disable nginx buffering
"X-Accel-Buffering": "no",
},
)

View File

@@ -0,0 +1,110 @@
"""Prometheus metrics for connector health and index attempts.
Emitted by docfetching and docprocessing workers when connector or
index attempt state changes. All functions silently catch exceptions
to avoid disrupting the caller's business logic.
Gauge metrics (error state, last success timestamp) are per-process.
With multiple worker pods, use max() aggregation in PromQL to get the
correct value across instances, e.g.:
max by (cc_pair_id) (onyx_connector_in_error_state)
"""
from prometheus_client import Counter
from prometheus_client import Gauge
from onyx.utils.logger import setup_logger
logger = setup_logger()
# --- Index attempt lifecycle ---
INDEX_ATTEMPT_STATUS = Counter(
"onyx_index_attempt_transitions_total",
"Index attempt status transitions",
["tenant_id", "source", "cc_pair_id", "status"],
)
# --- Connector health ---
CONNECTOR_IN_ERROR_STATE = Gauge(
"onyx_connector_in_error_state",
"Whether the connector is in a repeated error state (1=yes, 0=no)",
["tenant_id", "source", "cc_pair_id"],
)
CONNECTOR_LAST_SUCCESS_TIMESTAMP = Gauge(
"onyx_connector_last_success_timestamp_seconds",
"Unix timestamp of last successful indexing for this connector",
["tenant_id", "source", "cc_pair_id"],
)
CONNECTOR_DOCS_INDEXED = Counter(
"onyx_connector_docs_indexed_total",
"Total documents indexed per connector (monotonic)",
["tenant_id", "source", "cc_pair_id"],
)
CONNECTOR_INDEXING_ERRORS = Counter(
"onyx_connector_indexing_errors_total",
"Total failed index attempts per connector (monotonic)",
["tenant_id", "source", "cc_pair_id"],
)
def on_index_attempt_status_change(
tenant_id: str,
source: str,
cc_pair_id: int,
status: str,
) -> None:
"""Called on any index attempt status transition."""
try:
labels = {
"tenant_id": tenant_id,
"source": source,
"cc_pair_id": str(cc_pair_id),
}
INDEX_ATTEMPT_STATUS.labels(**labels, status=status).inc()
if status == "failed":
CONNECTOR_INDEXING_ERRORS.labels(**labels).inc()
except Exception:
logger.debug("Failed to record index attempt status metric", exc_info=True)
def on_connector_error_state_change(
tenant_id: str,
source: str,
cc_pair_id: int,
in_error: bool,
) -> None:
"""Called when a connector's in_repeated_error_state changes."""
try:
CONNECTOR_IN_ERROR_STATE.labels(
tenant_id=tenant_id,
source=source,
cc_pair_id=str(cc_pair_id),
).set(1.0 if in_error else 0.0)
except Exception:
logger.debug("Failed to record connector error state metric", exc_info=True)
def on_connector_indexing_success(
tenant_id: str,
source: str,
cc_pair_id: int,
docs_indexed: int,
success_timestamp: float,
) -> None:
"""Called when an indexing run completes successfully."""
try:
labels = {
"tenant_id": tenant_id,
"source": source,
"cc_pair_id": str(cc_pair_id),
}
CONNECTOR_LAST_SUCCESS_TIMESTAMP.labels(**labels).set(success_timestamp)
if docs_indexed > 0:
CONNECTOR_DOCS_INDEXED.labels(**labels).inc(docs_indexed)
except Exception:
logger.debug("Failed to record connector success metric", exc_info=True)

View File

@@ -1,25 +1,30 @@
"""Prometheus collectors for Celery queue depths and indexing pipeline state.
"""Prometheus collectors for Celery queue depths and infrastructure health.
These collectors query Redis and Postgres at scrape time (the Collector pattern),
These collectors query Redis at scrape time (the Collector pattern),
so metrics are always fresh when Prometheus scrapes /metrics. They run inside the
monitoring celery worker which already has Redis and DB access.
monitoring celery worker which already has Redis access.
To avoid hammering Redis/Postgres on every 15s scrape, results are cached with
To avoid hammering Redis on every 15s scrape, results are cached with
a configurable TTL (default 30s). This means metrics may be up to TTL seconds
stale, which is fine for monitoring dashboards.
Note: connector health and index attempt metrics are push-based (emitted by
workers at state-change time) and live in connector_health_metrics.py.
"""
from __future__ import annotations
import concurrent.futures
import json
import threading
import time
from datetime import datetime
from datetime import timezone
from typing import Any
from prometheus_client.core import GaugeMetricFamily
from prometheus_client.registry import Collector
from redis import Redis
from onyx.background.celery.celery_redis import celery_get_broker_client
from onyx.background.celery.celery_redis import celery_get_queue_length
from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
from onyx.configs.constants import OnyxCeleryQueues
@@ -31,6 +36,11 @@ logger = setup_logger()
# the previous result without re-querying Redis/Postgres.
_DEFAULT_CACHE_TTL = 30.0
# Maximum time (seconds) a single _collect_fresh() call may take before
# the collector gives up and returns stale/empty results. Prevents the
# /metrics endpoint from hanging indefinitely when a DB or Redis query stalls.
_DEFAULT_COLLECT_TIMEOUT = 120.0
_QUEUE_LABEL_MAP: dict[str, str] = {
OnyxCeleryQueues.PRIMARY: "primary",
OnyxCeleryQueues.DOCPROCESSING: "docprocessing",
@@ -62,18 +72,32 @@ _UNACKED_QUEUES: list[str] = [
class _CachedCollector(Collector):
"""Base collector with TTL-based caching.
"""Base collector with TTL-based caching and timeout protection.
Subclasses implement ``_collect_fresh()`` to query the actual data source.
The base ``collect()`` returns cached results if the TTL hasn't expired,
avoiding repeated queries when Prometheus scrapes frequently.
A per-collection timeout prevents a slow DB or Redis query from blocking
the /metrics endpoint indefinitely. If _collect_fresh() exceeds the
timeout, stale cached results are returned instead.
"""
def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
def __init__(
self,
cache_ttl: float = _DEFAULT_CACHE_TTL,
collect_timeout: float = _DEFAULT_COLLECT_TIMEOUT,
) -> None:
self._cache_ttl = cache_ttl
self._collect_timeout = collect_timeout
self._cached_result: list[GaugeMetricFamily] | None = None
self._last_collect_time: float = 0.0
self._lock = threading.Lock()
self._executor = concurrent.futures.ThreadPoolExecutor(
max_workers=1,
thread_name_prefix=type(self).__name__,
)
self._inflight: concurrent.futures.Future | None = None
def collect(self) -> list[GaugeMetricFamily]:
with self._lock:
@@ -84,12 +108,28 @@ class _CachedCollector(Collector):
):
return self._cached_result
# If a previous _collect_fresh() is still running, wait on it
# rather than queuing another. This prevents unbounded task
# accumulation in the executor during extended DB outages.
if self._inflight is not None and not self._inflight.done():
future = self._inflight
else:
future = self._executor.submit(self._collect_fresh)
self._inflight = future
try:
result = self._collect_fresh()
result = future.result(timeout=self._collect_timeout)
self._inflight = None
self._cached_result = result
self._last_collect_time = now
return result
except concurrent.futures.TimeoutError:
logger.warning(
f"{type(self).__name__}._collect_fresh() timed out after {self._collect_timeout}s, returning stale cache"
)
return self._cached_result if self._cached_result is not None else []
except Exception:
self._inflight = None
logger.exception(f"Error in {type(self).__name__}.collect()")
# Return stale cache on error rather than nothing — avoids
# metrics disappearing during transient failures.
@@ -117,8 +157,6 @@ class QueueDepthCollector(_CachedCollector):
if self._celery_app is None:
return []
from onyx.background.celery.celery_redis import celery_get_broker_client
redis_client = celery_get_broker_client(self._celery_app)
depth = GaugeMetricFamily(
@@ -194,208 +232,6 @@ class QueueDepthCollector(_CachedCollector):
return None
class IndexAttemptCollector(_CachedCollector):
"""Queries Postgres for index attempt state on each scrape."""
def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
super().__init__(cache_ttl)
self._configured: bool = False
self._terminal_statuses: list = []
def configure(self) -> None:
"""Call once DB engine is initialized."""
from onyx.db.enums import IndexingStatus
self._terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
self._configured = True
def _collect_fresh(self) -> list[GaugeMetricFamily]:
if not self._configured:
return []
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from onyx.db.index_attempt import get_active_index_attempts_for_metrics
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
attempts_gauge = GaugeMetricFamily(
"onyx_index_attempts_active",
"Number of non-terminal index attempts",
labels=[
"status",
"source",
"tenant_id",
"connector_name",
"cc_pair_id",
],
)
tenant_ids = get_all_tenant_ids()
for tid in tenant_ids:
# Defensive guard — get_all_tenant_ids() should never yield None,
# but we guard here for API stability in case the contract changes.
if tid is None:
continue
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)
try:
with get_session_with_current_tenant() as session:
rows = get_active_index_attempts_for_metrics(session)
for status, source, cc_id, cc_name, count in rows:
name_val = cc_name or f"cc_pair_{cc_id}"
attempts_gauge.add_metric(
[
status.value,
source.value,
tid,
name_val,
str(cc_id),
],
count,
)
finally:
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
return [attempts_gauge]
class ConnectorHealthCollector(_CachedCollector):
"""Queries Postgres for connector health state on each scrape."""
def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
super().__init__(cache_ttl)
self._configured: bool = False
def configure(self) -> None:
"""Call once DB engine is initialized."""
self._configured = True
def _collect_fresh(self) -> list[GaugeMetricFamily]:
if not self._configured:
return []
from onyx.db.connector_credential_pair import (
get_connector_health_for_metrics,
)
from onyx.db.engine.sql_engine import get_session_with_current_tenant
from onyx.db.engine.tenant_utils import get_all_tenant_ids
from onyx.db.index_attempt import get_docs_indexed_by_cc_pair
from onyx.db.index_attempt import get_failed_attempt_counts_by_cc_pair
from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
staleness_gauge = GaugeMetricFamily(
"onyx_connector_last_success_age_seconds",
"Seconds since last successful index for this connector",
labels=["tenant_id", "source", "cc_pair_id", "connector_name"],
)
error_state_gauge = GaugeMetricFamily(
"onyx_connector_in_error_state",
"Whether the connector is in a repeated error state (1=yes, 0=no)",
labels=["tenant_id", "source", "cc_pair_id", "connector_name"],
)
by_status_gauge = GaugeMetricFamily(
"onyx_connectors_by_status",
"Number of connectors grouped by status",
labels=["tenant_id", "status"],
)
error_total_gauge = GaugeMetricFamily(
"onyx_connectors_in_error_total",
"Total number of connectors in repeated error state",
labels=["tenant_id"],
)
per_connector_labels = [
"tenant_id",
"source",
"cc_pair_id",
"connector_name",
]
docs_success_gauge = GaugeMetricFamily(
"onyx_connector_docs_indexed",
"Total new documents indexed (90-day rolling sum) per connector",
labels=per_connector_labels,
)
docs_error_gauge = GaugeMetricFamily(
"onyx_connector_error_count",
"Total number of failed index attempts per connector",
labels=per_connector_labels,
)
now = datetime.now(tz=timezone.utc)
tenant_ids = get_all_tenant_ids()
for tid in tenant_ids:
# Defensive guard — get_all_tenant_ids() should never yield None,
# but we guard here for API stability in case the contract changes.
if tid is None:
continue
token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)
try:
with get_session_with_current_tenant() as session:
pairs = get_connector_health_for_metrics(session)
error_counts_by_cc = get_failed_attempt_counts_by_cc_pair(session)
docs_by_cc = get_docs_indexed_by_cc_pair(session)
status_counts: dict[str, int] = {}
error_count = 0
for (
cc_id,
status,
in_error,
last_success,
cc_name,
source,
) in pairs:
cc_id_str = str(cc_id)
source_val = source.value
name_val = cc_name or f"cc_pair_{cc_id}"
label_vals = [tid, source_val, cc_id_str, name_val]
if last_success is not None:
# Both `now` and `last_success` are timezone-aware
# (the DB column uses DateTime(timezone=True)),
# so subtraction is safe.
age = (now - last_success).total_seconds()
staleness_gauge.add_metric(label_vals, age)
error_state_gauge.add_metric(
label_vals,
1.0 if in_error else 0.0,
)
if in_error:
error_count += 1
docs_success_gauge.add_metric(
label_vals,
docs_by_cc.get(cc_id, 0),
)
docs_error_gauge.add_metric(
label_vals,
error_counts_by_cc.get(cc_id, 0),
)
status_val = status.value
status_counts[status_val] = status_counts.get(status_val, 0) + 1
for status_val, count in status_counts.items():
by_status_gauge.add_metric([tid, status_val], count)
error_total_gauge.add_metric([tid], error_count)
finally:
CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
return [
staleness_gauge,
error_state_gauge,
by_status_gauge,
error_total_gauge,
docs_success_gauge,
docs_error_gauge,
]
class RedisHealthCollector(_CachedCollector):
"""Collects Redis server health metrics (memory, clients, etc.)."""
@@ -411,8 +247,6 @@ class RedisHealthCollector(_CachedCollector):
if self._celery_app is None:
return []
from onyx.background.celery.celery_redis import celery_get_broker_client
redis_client = celery_get_broker_client(self._celery_app)
memory_used = GaugeMetricFamily(
@@ -495,7 +329,9 @@ class WorkerHeartbeatMonitor:
},
)
recv.capture(
limit=None, timeout=self._HEARTBEAT_TIMEOUT_SECONDS, wakeup=True
limit=None,
timeout=self._HEARTBEAT_TIMEOUT_SECONDS,
wakeup=True,
)
except Exception:
if self._running:

View File

@@ -6,8 +6,6 @@ Called once by the monitoring celery worker after Redis and DB are ready.
from celery import Celery
from prometheus_client.registry import REGISTRY
from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
from onyx.server.metrics.indexing_pipeline import RedisHealthCollector
from onyx.server.metrics.indexing_pipeline import WorkerHealthCollector
@@ -21,8 +19,6 @@ logger = setup_logger()
# module level ensures they survive the lifetime of the worker process and are
# only registered with the Prometheus registry once.
_queue_collector = QueueDepthCollector()
_attempt_collector = IndexAttemptCollector()
_connector_collector = ConnectorHealthCollector()
_redis_health_collector = RedisHealthCollector()
_worker_health_collector = WorkerHealthCollector()
_heartbeat_monitor: WorkerHeartbeatMonitor | None = None
@@ -34,6 +30,9 @@ def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
Args:
celery_app: The Celery application instance. Used to obtain a
broker Redis client on each scrape for queue depth metrics.
Note: connector health and index attempt metrics are push-based
(see connector_health_metrics.py) and do not use collectors.
"""
_queue_collector.set_celery_app(celery_app)
_redis_health_collector.set_celery_app(celery_app)
@@ -47,13 +46,8 @@ def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
_heartbeat_monitor.start()
_worker_health_collector.set_monitor(_heartbeat_monitor)
_attempt_collector.configure()
_connector_collector.configure()
for collector in (
_queue_collector,
_attempt_collector,
_connector_collector,
_redis_health_collector,
_worker_health_collector,
):

View File

@@ -299,7 +299,7 @@ h11==0.16.0
# uvicorn
h2==4.3.0
# via httpx
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
# via huggingface-hub
hpack==4.1.0
# via h2
@@ -322,6 +322,7 @@ httpx==0.28.1
# fastmcp
# google-genai
# httpx-oauth
# huggingface-hub
# langfuse
# langsmith
# litellm
@@ -334,7 +335,7 @@ httpx-sse==0.4.3
# cohere
# mcp
hubspot-api-client==11.1.0
huggingface-hub==0.35.3
huggingface-hub==1.10.2
# via tokenizers
humanfriendly==10.0
# via coloredlogs
@@ -589,7 +590,7 @@ platformdirs==4.5.0
# via
# fastmcp
# zeep
playwright==1.55.0
playwright==1.58.0
# via pytest-playwright
pluggy==1.6.0
# via pytest
@@ -784,7 +785,6 @@ requests==2.33.0
# google-api-core
# google-genai
# hubspot-api-client
# huggingface-hub
# jira
# jsonschema-path
# kubernetes
@@ -911,7 +911,7 @@ tiktoken==0.7.0
timeago==1.0.16
tld==0.13.1
# via courlan
tokenizers==0.21.4
tokenizers==0.22.2
# via
# chonkie
# cohere
@@ -933,7 +933,9 @@ tqdm==4.67.1
# unstructured
trafilatura==1.12.2
typer==0.20.0
# via mcp
# via
# huggingface-hub
# mcp
types-awscrt==0.28.4
# via botocore-stubs
types-openpyxl==3.0.4.7

View File

@@ -82,6 +82,7 @@ click==8.3.1
# via
# black
# litellm
# typer
# uvicorn
cohere==5.6.1
# via onyx
@@ -153,7 +154,7 @@ h11==0.16.0
# httpcore
# uvicorn
hatchling==1.28.0
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
# via huggingface-hub
httpcore==1.0.9
# via httpx
@@ -161,6 +162,7 @@ httpx==0.28.1
# via
# cohere
# google-genai
# huggingface-hub
# litellm
# mcp
# openai
@@ -168,7 +170,7 @@ httpx-sse==0.4.3
# via
# cohere
# mcp
huggingface-hub==0.35.3
huggingface-hub==1.10.2
# via tokenizers
identify==2.6.15
# via pre-commit
@@ -219,6 +221,8 @@ litellm==1.81.6
mako==1.2.4
# via alembic
manygo==0.2.0
markdown-it-py==4.0.0
# via rich
markupsafe==3.0.3
# via
# jinja2
@@ -230,6 +234,8 @@ matplotlib-inline==0.2.1
# ipython
mcp==1.26.0
# via claude-agent-sdk
mdurl==0.1.2
# via markdown-it-py
multidict==6.7.0
# via
# aiobotocore
@@ -340,6 +346,7 @@ pygments==2.20.0
# ipython
# ipython-pygments-lexers
# pytest
# rich
pyjwt==2.12.0
# via mcp
pyparsing==3.2.5
@@ -395,7 +402,6 @@ requests==2.33.0
# via
# cohere
# google-genai
# huggingface-hub
# kubernetes
# requests-oauthlib
# tiktoken
@@ -404,6 +410,8 @@ requests-oauthlib==1.3.1
# via kubernetes
retry==0.9.2
# via onyx
rich==14.2.0
# via typer
rpds-py==0.29.0
# via
# jsonschema
@@ -415,6 +423,8 @@ s3transfer==0.13.1
# via boto3
sentry-sdk==2.14.0
# via onyx
shellingham==1.5.4
# via typer
six==1.17.0
# via
# kubernetes
@@ -442,7 +452,7 @@ tenacity==9.1.2
# voyageai
tiktoken==0.7.0
# via litellm
tokenizers==0.21.4
tokenizers==0.22.2
# via
# cohere
# litellm
@@ -463,6 +473,8 @@ traitlets==5.14.3
# matplotlib-inline
trove-classifiers==2025.12.1.14
# via hatchling
typer==0.20.0
# via huggingface-hub
types-beautifulsoup4==4.12.0.3
types-html5lib==1.1.11.13
# via types-beautifulsoup4
@@ -500,6 +512,7 @@ typing-extensions==4.15.0
# referencing
# sqlalchemy
# starlette
# typer
# typing-inspection
typing-inspection==0.4.2
# via

View File

@@ -69,6 +69,7 @@ claude-agent-sdk==0.1.19
click==8.3.1
# via
# litellm
# typer
# uvicorn
cohere==5.6.1
# via onyx
@@ -112,7 +113,7 @@ h11==0.16.0
# via
# httpcore
# uvicorn
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
# via huggingface-hub
httpcore==1.0.9
# via httpx
@@ -120,6 +121,7 @@ httpx==0.28.1
# via
# cohere
# google-genai
# huggingface-hub
# litellm
# mcp
# openai
@@ -127,7 +129,7 @@ httpx-sse==0.4.3
# via
# cohere
# mcp
huggingface-hub==0.35.3
huggingface-hub==1.10.2
# via tokenizers
idna==3.11
# via
@@ -156,10 +158,14 @@ kubernetes==31.0.0
# via onyx
litellm==1.81.6
# via onyx
markdown-it-py==4.0.0
# via rich
markupsafe==3.0.3
# via jinja2
mcp==1.26.0
# via claude-agent-sdk
mdurl==0.1.2
# via markdown-it-py
monotonic==1.6
# via posthog
multidict==6.7.0
@@ -217,6 +223,8 @@ pydantic-core==2.33.2
# via pydantic
pydantic-settings==2.12.0
# via mcp
pygments==2.20.0
# via rich
pyjwt==2.12.0
# via mcp
python-dateutil==2.8.2
@@ -247,7 +255,6 @@ requests==2.33.0
# via
# cohere
# google-genai
# huggingface-hub
# kubernetes
# posthog
# requests-oauthlib
@@ -257,6 +264,8 @@ requests-oauthlib==1.3.1
# via kubernetes
retry==0.9.2
# via onyx
rich==14.2.0
# via typer
rpds-py==0.29.0
# via
# jsonschema
@@ -267,6 +276,8 @@ s3transfer==0.13.1
# via boto3
sentry-sdk==2.14.0
# via onyx
shellingham==1.5.4
# via typer
six==1.17.0
# via
# kubernetes
@@ -289,7 +300,7 @@ tenacity==9.1.2
# voyageai
tiktoken==0.7.0
# via litellm
tokenizers==0.21.4
tokenizers==0.22.2
# via
# cohere
# litellm
@@ -297,6 +308,8 @@ tqdm==4.67.1
# via
# huggingface-hub
# openai
typer==0.20.0
# via huggingface-hub
types-requests==2.32.0.20250328
# via cohere
typing-extensions==4.15.0
@@ -313,6 +326,7 @@ typing-extensions==4.15.0
# pydantic-core
# referencing
# starlette
# typer
# typing-inspection
typing-inspection==0.4.2
# via

View File

@@ -78,6 +78,7 @@ click==8.3.1
# click-plugins
# click-repl
# litellm
# typer
# uvicorn
click-didyoumean==0.3.1
# via celery
@@ -116,7 +117,6 @@ filelock==3.20.3
# via
# huggingface-hub
# torch
# transformers
frozenlist==1.8.0
# via
# aiohttp
@@ -135,7 +135,7 @@ h11==0.16.0
# via
# httpcore
# uvicorn
hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
# via huggingface-hub
httpcore==1.0.9
# via httpx
@@ -143,6 +143,7 @@ httpx==0.28.1
# via
# cohere
# google-genai
# huggingface-hub
# litellm
# mcp
# openai
@@ -150,7 +151,7 @@ httpx-sse==0.4.3
# via
# cohere
# mcp
huggingface-hub==0.35.3
huggingface-hub==1.10.2
# via
# accelerate
# sentence-transformers
@@ -189,10 +190,14 @@ kubernetes==31.0.0
# via onyx
litellm==1.81.6
# via onyx
markdown-it-py==4.0.0
# via rich
markupsafe==3.0.3
# via jinja2
mcp==1.26.0
# via claude-agent-sdk
mdurl==0.1.2
# via markdown-it-py
mpmath==1.3.0
# via sympy
multidict==6.7.0
@@ -207,6 +212,7 @@ numpy==2.4.1
# accelerate
# scikit-learn
# scipy
# sentence-transformers
# transformers
# voyageai
nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
@@ -264,8 +270,6 @@ packaging==24.2
# transformers
parameterized==0.9.0
# via cohere
pillow==12.2.0
# via sentence-transformers
prometheus-client==0.23.1
# via
# onyx
@@ -305,6 +309,8 @@ pydantic-core==2.33.2
# via pydantic
pydantic-settings==2.12.0
# via mcp
pygments==2.20.0
# via rich
pyjwt==2.12.0
# via mcp
python-dateutil==2.8.2
@@ -339,16 +345,16 @@ requests==2.33.0
# via
# cohere
# google-genai
# huggingface-hub
# kubernetes
# requests-oauthlib
# tiktoken
# transformers
# voyageai
requests-oauthlib==1.3.1
# via kubernetes
retry==0.9.2
# via onyx
rich==14.2.0
# via typer
rpds-py==0.29.0
# via
# jsonschema
@@ -367,11 +373,13 @@ scipy==1.16.3
# via
# scikit-learn
# sentence-transformers
sentence-transformers==4.0.2
sentence-transformers==5.4.1
sentry-sdk==2.14.0
# via onyx
setuptools==80.9.0 ; python_full_version >= '3.12'
# via torch
shellingham==1.5.4
# via typer
six==1.17.0
# via
# kubernetes
@@ -398,7 +406,7 @@ threadpoolctl==3.6.0
# via scikit-learn
tiktoken==0.7.0
# via litellm
tokenizers==0.21.4
tokenizers==0.22.2
# via
# cohere
# litellm
@@ -413,10 +421,14 @@ tqdm==4.67.1
# openai
# sentence-transformers
# transformers
transformers==4.53.0
transformers==5.5.4
# via sentence-transformers
triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
# via torch
typer==0.20.0
# via
# huggingface-hub
# transformers
types-requests==2.32.0.20250328
# via cohere
typing-extensions==4.15.0
@@ -435,6 +447,7 @@ typing-extensions==4.15.0
# sentence-transformers
# starlette
# torch
# typer
# typing-inspection
typing-inspection==0.4.2
# via

View File

@@ -4,6 +4,7 @@ from unittest.mock import patch
from urllib.parse import urlparse
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
from tests.daily.connectors.google_drive.consts_and_utils import _pick
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
@@ -699,3 +700,43 @@ def test_specific_user_email_shared_with_me(
doc_titles = set(doc.semantic_identifier for doc in output.documents)
assert doc_titles == set(expected)
@patch(
"onyx.file_processing.extract_file_text.get_unstructured_api_key",
return_value=None,
)
def test_slim_retrieval_does_not_call_permissions_list(
mock_get_api_key: MagicMock, # noqa: ARG001
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
) -> None:
"""retrieve_all_slim_docs() must not call permissions().list for any file.
Pruning only needs file IDs — fetching permissions per file causes O(N) API
calls that time out for tenants with large numbers of externally-owned files.
"""
connector = google_drive_service_acct_connector_factory(
primary_admin_email=ADMIN_EMAIL,
include_shared_drives=True,
include_my_drives=True,
include_files_shared_with_me=False,
shared_folder_urls=None,
shared_drive_urls=None,
my_drive_emails=None,
)
with patch(
"onyx.connectors.google_drive.connector.execute_paginated_retrieval",
wraps=execute_paginated_retrieval,
) as mock_paginated:
for batch in connector.retrieve_all_slim_docs():
pass
permissions_calls = [
c
for c in mock_paginated.call_args_list
if "permissions" in str(c.kwargs.get("retrieval_function", ""))
]
assert (
len(permissions_calls) == 0
), f"permissions().list was called {len(permissions_calls)} time(s) during pruning"

View File

@@ -12,6 +12,7 @@ from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.connectors.models import ImageSection
from onyx.connectors.models import TabularSection
from onyx.connectors.models import TextSection
_ITERATION_LIMIT = 100_000
@@ -141,13 +142,15 @@ def load_all_from_connector(
def to_sections(
documents: list[Document],
) -> Iterator[TextSection | ImageSection]:
) -> Iterator[TextSection | ImageSection | TabularSection]:
for doc in documents:
for section in doc.sections:
yield section
def to_text_sections(sections: Iterator[TextSection | ImageSection]) -> Iterator[str]:
def to_text_sections(
sections: Iterator[TextSection | ImageSection | TabularSection],
) -> Iterator[str]:
for section in sections:
if isinstance(section, TextSection):
yield section.text

View File

@@ -12,7 +12,7 @@ from onyx.db.models import DocumentByConnectorCredentialPair
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.constants import NUM_DOCS
from tests.integration.common_utils.managers.api_key import DATestAPIKey
from tests.integration.common_utils.managers.cc_pair import DATestCCPair
from tests.integration.common_utils.test_models import DATestCCPair
from tests.integration.common_utils.test_models import DATestUser
from tests.integration.common_utils.test_models import SimpleTestDocument
from tests.integration.common_utils.vespa import vespa_fixture

View File

@@ -14,7 +14,6 @@ from onyx.db.search_settings import get_current_search_settings
from tests.integration.common_utils.constants import ADMIN_USER_NAME
from tests.integration.common_utils.constants import GENERAL_HEADERS
from tests.integration.common_utils.managers.api_key import APIKeyManager
from tests.integration.common_utils.managers.cc_pair import CCPairManager
from tests.integration.common_utils.managers.document import DocumentManager
from tests.integration.common_utils.managers.image_generation import (
ImageGenerationConfigManager,
@@ -196,6 +195,9 @@ def image_generation_config(
@pytest.fixture
def document_builder(admin_user: DATestUser) -> DocumentBuilderType:
# HACK: Avoid importing generated OpenAPI client modules unless this fixture is used.
from tests.integration.common_utils.managers.cc_pair import CCPairManager
api_key: DATestAPIKey = APIKeyManager.create(
user_performing_action=admin_user,
)

View File

@@ -1,4 +1,4 @@
FROM python:3.11.7-slim-bookworm
FROM python:3.11-slim-bookworm@sha256:9c6f90801e6b68e772b7c0ca74260cbf7af9f320acec894e26fccdaccfbe3b47
WORKDIR /app

View File

@@ -108,12 +108,12 @@ def current_head_rev() -> str:
["alembic", "heads", "--resolve-dependencies"],
cwd=_BACKEND_DIR,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stderr=subprocess.PIPE,
text=True,
)
assert (
result.returncode == 0
), f"alembic heads failed (exit {result.returncode}):\n{result.stdout}"
), f"alembic heads failed (exit {result.returncode}):\n{result.stdout}\n{result.stderr}"
# Output looks like "d5c86e2c6dc6 (head)\n"
rev = result.stdout.strip().split()[0]
assert len(rev) > 0

View File

@@ -0,0 +1,83 @@
"""
Integration tests verifying the knowledge_sources field on MinimalPersonaSnapshot.
The GET /persona endpoint returns MinimalPersonaSnapshot, which includes a
knowledge_sources list derived from the persona's document sets, hierarchy
nodes, attached documents, and user files. These tests verify that the
field is populated correctly.
"""
import requests
from onyx.configs.constants import DocumentSource
from tests.integration.common_utils.constants import API_SERVER_URL
from tests.integration.common_utils.managers.file import FileManager
from tests.integration.common_utils.managers.persona import PersonaManager
from tests.integration.common_utils.test_file_utils import create_test_text_file
from tests.integration.common_utils.test_models import DATestLLMProvider
from tests.integration.common_utils.test_models import DATestUser
def _get_minimal_persona(
persona_id: int,
user: DATestUser,
) -> dict:
"""Fetch personas from the list endpoint and find the one with the given id."""
response = requests.get(
f"{API_SERVER_URL}/persona",
params={"persona_ids": persona_id},
headers=user.headers,
)
response.raise_for_status()
personas = response.json()
matches = [p for p in personas if p["id"] == persona_id]
assert (
len(matches) == 1
), f"Expected 1 persona with id={persona_id}, got {len(matches)}"
return matches[0]
def test_persona_with_user_files_includes_user_file_source(
admin_user: DATestUser,
llm_provider: DATestLLMProvider, # noqa: ARG001
) -> None:
"""When a persona has user files attached, knowledge_sources includes 'user_file'."""
text_file = create_test_text_file("test content for knowledge source verification")
file_descriptors, error = FileManager.upload_files(
files=[("test_ks.txt", text_file)],
user_performing_action=admin_user,
)
assert not error, f"File upload failed: {error}"
user_file_id = file_descriptors[0]["user_file_id"] or ""
persona = PersonaManager.create(
user_performing_action=admin_user,
name="KS User File Agent",
description="Agent with user files for knowledge_sources test",
system_prompt="You are a helpful assistant.",
user_file_ids=[user_file_id],
)
minimal = _get_minimal_persona(persona.id, admin_user)
assert (
DocumentSource.USER_FILE.value in minimal["knowledge_sources"]
), f"Expected 'user_file' in knowledge_sources, got: {minimal['knowledge_sources']}"
def test_persona_without_user_files_excludes_user_file_source(
admin_user: DATestUser,
llm_provider: DATestLLMProvider, # noqa: ARG001
) -> None:
"""When a persona has no user files, knowledge_sources should not contain 'user_file'."""
persona = PersonaManager.create(
user_performing_action=admin_user,
name="KS No Files Agent",
description="Agent without files for knowledge_sources test",
system_prompt="You are a helpful assistant.",
)
minimal = _get_minimal_persona(persona.id, admin_user)
assert (
DocumentSource.USER_FILE.value not in minimal["knowledge_sources"]
), f"Unexpected 'user_file' in knowledge_sources: {minimal['knowledge_sources']}"

View File

@@ -0,0 +1,88 @@
from typing import cast
from unittest.mock import MagicMock
from unittest.mock import patch
from sentry_sdk.types import Event
import onyx.configs.sentry as sentry_module
from onyx.configs.sentry import _add_instance_tags
def _event(data: dict) -> Event:
"""Helper to create a Sentry Event from a plain dict for testing."""
return cast(Event, data)
def _reset_state() -> None:
"""Reset the module-level resolved flag between tests."""
sentry_module._instance_id_resolved = False
class TestAddInstanceTags:
def setup_method(self) -> None:
_reset_state()
@patch("onyx.utils.telemetry.get_or_generate_uuid", return_value="test-uuid-1234")
@patch("sentry_sdk.set_tag")
def test_first_event_sets_instance_id(
self, mock_set_tag: MagicMock, mock_uuid: MagicMock
) -> None:
result = _add_instance_tags(_event({"message": "test error"}), {})
assert result is not None
assert result["tags"]["instance_id"] == "test-uuid-1234"
mock_set_tag.assert_called_once_with("instance_id", "test-uuid-1234")
mock_uuid.assert_called_once()
@patch("onyx.utils.telemetry.get_or_generate_uuid", return_value="test-uuid-1234")
@patch("sentry_sdk.set_tag")
def test_second_event_skips_resolution(
self, _mock_set_tag: MagicMock, mock_uuid: MagicMock
) -> None:
_add_instance_tags(_event({"message": "first"}), {})
result = _add_instance_tags(_event({"message": "second"}), {})
assert result is not None
assert "tags" not in result # second event not modified
mock_uuid.assert_called_once() # only resolved once
@patch(
"onyx.utils.telemetry.get_or_generate_uuid",
side_effect=Exception("DB unavailable"),
)
@patch("sentry_sdk.set_tag")
def test_resolution_failure_still_returns_event(
self, _mock_set_tag: MagicMock, _mock_uuid: MagicMock
) -> None:
result = _add_instance_tags(_event({"message": "test error"}), {})
assert result is not None
assert result["message"] == "test error"
assert "tags" not in result or "instance_id" not in result.get("tags", {})
@patch(
"onyx.utils.telemetry.get_or_generate_uuid",
side_effect=Exception("DB unavailable"),
)
@patch("sentry_sdk.set_tag")
def test_resolution_failure_retries_on_next_event(
self, _mock_set_tag: MagicMock, mock_uuid: MagicMock
) -> None:
"""If resolution fails (e.g. DB not ready), retry on the next event."""
_add_instance_tags(_event({"message": "first"}), {})
_add_instance_tags(_event({"message": "second"}), {})
assert mock_uuid.call_count == 2 # retried on second event
@patch("onyx.utils.telemetry.get_or_generate_uuid", return_value="test-uuid-1234")
@patch("sentry_sdk.set_tag")
def test_preserves_existing_tags(
self, _mock_set_tag: MagicMock, _mock_uuid: MagicMock
) -> None:
result = _add_instance_tags(
_event({"message": "test", "tags": {"existing": "tag"}}), {}
)
assert result is not None
assert result["tags"]["existing"] == "tag"
assert result["tags"]["instance_id"] == "test-uuid-1234"

View File

@@ -8,14 +8,23 @@ from unittest.mock import patch
import pytest
from onyx.access.models import ExternalAccess
from onyx.configs.constants import DocumentSource
from onyx.connectors.canvas.client import CanvasApiClient
from onyx.connectors.canvas.connector import _in_time_window
from onyx.connectors.canvas.connector import _parse_canvas_dt
from onyx.connectors.canvas.connector import _unix_to_canvas_time
from onyx.connectors.canvas.connector import CanvasConnector
from onyx.connectors.exceptions import ConnectorValidationError
from onyx.connectors.canvas.connector import CanvasConnectorCheckpoint
from onyx.connectors.canvas.connector import CanvasStage
from onyx.connectors.exceptions import CredentialExpiredError
from onyx.connectors.exceptions import InsufficientPermissionsError
from onyx.connectors.exceptions import UnexpectedValidationError
from onyx.connectors.models import ConnectorFailure
from onyx.connectors.models import ConnectorMissingCredentialError
from onyx.connectors.models import Document
from onyx.connectors.models import HierarchyNode
from onyx.error_handling.error_codes import OnyxErrorCode
from onyx.error_handling.exceptions import OnyxError
# ---------------------------------------------------------------------------
@@ -111,6 +120,56 @@ def _mock_response(
return resp
def _make_url_dispatcher(
courses: list[dict[str, Any]] | None = None,
pages: list[dict[str, Any]] | None = None,
assignments: list[dict[str, Any]] | None = None,
announcements: list[dict[str, Any]] | None = None,
page_error: bool = False,
) -> Any:
"""Return a callable that dispatches mock responses based on the request URL.
Meant to be assigned to ``mock_requests.get.side_effect``.
"""
api_prefix = f"{FAKE_BASE_URL}/api/v1"
def _dispatcher(url: str, **_kwargs: Any) -> MagicMock:
if page_error:
return _mock_response(500, {})
if url == f"{api_prefix}/courses":
return _mock_response(json_data=courses or [])
if "/pages" in url:
return _mock_response(json_data=pages or [])
if "/assignments" in url:
return _mock_response(json_data=assignments or [])
if "announcements" in url:
return _mock_response(json_data=announcements or [])
return _mock_response(json_data=[])
return _dispatcher
def _run_checkpoint(
connector: CanvasConnector,
checkpoint: CanvasConnectorCheckpoint,
start: float = 0.0,
end: float = datetime(2099, 1, 1, tzinfo=timezone.utc).timestamp(),
) -> tuple[
list[Document | HierarchyNode | ConnectorFailure], CanvasConnectorCheckpoint
]:
"""Run load_from_checkpoint once and collect yielded items + returned checkpoint."""
gen = connector.load_from_checkpoint(start, end, checkpoint)
items: list[Document | HierarchyNode | ConnectorFailure] = []
new_checkpoint: CanvasConnectorCheckpoint | None = None
try:
while True:
items.append(next(gen))
except StopIteration as e:
new_checkpoint = e.value
assert new_checkpoint is not None
return items, new_checkpoint
# ---------------------------------------------------------------------------
# CanvasApiClient.__init__ tests
# ---------------------------------------------------------------------------
@@ -269,15 +328,6 @@ class TestGet:
assert exc_info.value.status_code == 404
@patch("onyx.connectors.canvas.client.rl_requests")
def test_raises_on_429(self, mock_requests: MagicMock) -> None:
mock_requests.get.return_value = _mock_response(429, {})
with pytest.raises(OnyxError) as exc_info:
self.client.get("courses")
assert exc_info.value.status_code == 429
@patch("onyx.connectors.canvas.client.rl_requests")
def test_skips_params_when_using_full_url(self, mock_requests: MagicMock) -> None:
mock_requests.get.return_value = _mock_response(json_data=[])
@@ -454,6 +504,149 @@ class TestPaginate:
assert pages == []
@patch("onyx.connectors.canvas.client.rl_requests")
def test_error_extracts_message_from_error_dict(
self, mock_requests: MagicMock
) -> None:
"""Shape 1: {"error": {"message": "Not authorized"}}"""
mock_requests.get.return_value = _mock_response(
403, {"error": {"message": "Not authorized"}}
)
client = CanvasApiClient(
bearer_token=FAKE_TOKEN,
canvas_base_url=FAKE_BASE_URL,
)
with pytest.raises(OnyxError) as exc_info:
client.get("courses")
result = exc_info.value.detail
expected = "Not authorized"
assert result == expected
@patch("onyx.connectors.canvas.client.rl_requests")
def test_error_extracts_message_from_error_string(
self, mock_requests: MagicMock
) -> None:
"""Shape 2: {"error": "Invalid access token"}"""
mock_requests.get.return_value = _mock_response(
401, {"error": "Invalid access token"}
)
client = CanvasApiClient(
bearer_token=FAKE_TOKEN,
canvas_base_url=FAKE_BASE_URL,
)
with pytest.raises(OnyxError) as exc_info:
client.get("courses")
result = exc_info.value.detail
expected = "Invalid access token"
assert result == expected
@patch("onyx.connectors.canvas.client.rl_requests")
def test_error_extracts_message_from_errors_list(
self, mock_requests: MagicMock
) -> None:
"""Shape 3: {"errors": [{"message": "Invalid query"}]}"""
mock_requests.get.return_value = _mock_response(
400, {"errors": [{"message": "Invalid query"}]}
)
client = CanvasApiClient(
bearer_token=FAKE_TOKEN,
canvas_base_url=FAKE_BASE_URL,
)
with pytest.raises(OnyxError) as exc_info:
client.get("courses")
result = exc_info.value.detail
expected = "Invalid query"
assert result == expected
@patch("onyx.connectors.canvas.client.rl_requests")
def test_error_dict_takes_priority_over_errors_list(
self, mock_requests: MagicMock
) -> None:
"""When both error shapes are present, error dict wins."""
mock_requests.get.return_value = _mock_response(
403, {"error": "Specific error", "errors": [{"message": "Generic"}]}
)
client = CanvasApiClient(
bearer_token=FAKE_TOKEN,
canvas_base_url=FAKE_BASE_URL,
)
with pytest.raises(OnyxError) as exc_info:
client.get("courses")
result = exc_info.value.detail
expected = "Specific error"
assert result == expected
@patch("onyx.connectors.canvas.client.rl_requests")
def test_error_falls_back_to_reason_when_no_json_message(
self, mock_requests: MagicMock
) -> None:
"""Empty error body falls back to response.reason."""
mock_requests.get.return_value = _mock_response(500, {})
client = CanvasApiClient(
bearer_token=FAKE_TOKEN,
canvas_base_url=FAKE_BASE_URL,
)
with pytest.raises(OnyxError) as exc_info:
client.get("courses")
result = exc_info.value.detail
expected = "Error" # from _mock_response's reason for >= 300
assert result == expected
@patch("onyx.connectors.canvas.client.rl_requests")
def test_invalid_json_on_success_raises(self, mock_requests: MagicMock) -> None:
"""Invalid JSON on a 2xx response raises OnyxError."""
resp = MagicMock()
resp.status_code = 200
resp.json.side_effect = ValueError("No JSON")
resp.headers = {"Link": ""}
mock_requests.get.return_value = resp
client = CanvasApiClient(
bearer_token=FAKE_TOKEN,
canvas_base_url=FAKE_BASE_URL,
)
with pytest.raises(OnyxError, match="Invalid JSON"):
client.get("courses")
@patch("onyx.connectors.canvas.client.rl_requests")
def test_invalid_json_on_error_falls_back_to_reason(
self, mock_requests: MagicMock
) -> None:
"""Invalid JSON on a 4xx response falls back to response.reason."""
resp = MagicMock()
resp.status_code = 500
resp.reason = "Internal Server Error"
resp.json.side_effect = ValueError("No JSON")
resp.headers = {"Link": ""}
mock_requests.get.return_value = resp
client = CanvasApiClient(
bearer_token=FAKE_TOKEN,
canvas_base_url=FAKE_BASE_URL,
)
with pytest.raises(OnyxError) as exc_info:
client.get("courses")
result = exc_info.value.detail
expected = "Internal Server Error"
assert result == expected
# ---------------------------------------------------------------------------
# CanvasApiClient._parse_next_link tests
@@ -588,6 +781,16 @@ class TestConnectorUrlNormalization:
assert result == expected
@patch("onyx.connectors.canvas.client.rl_requests")
def test_load_credentials_insufficient_permissions(
self, mock_requests: MagicMock
) -> None:
mock_requests.get.return_value = _mock_response(403, {})
connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)
with pytest.raises(InsufficientPermissionsError):
connector.load_credentials({"canvas_access_token": FAKE_TOKEN})
# ---------------------------------------------------------------------------
# CanvasConnector — document conversion
@@ -766,10 +969,6 @@ class TestValidateConnectorSettings:
def test_validate_insufficient_permissions(self, mock_requests: MagicMock) -> None:
self._assert_validate_raises(403, InsufficientPermissionsError, mock_requests)
@patch("onyx.connectors.canvas.client.rl_requests")
def test_validate_rate_limited(self, mock_requests: MagicMock) -> None:
self._assert_validate_raises(429, ConnectorValidationError, mock_requests)
@patch("onyx.connectors.canvas.client.rl_requests")
def test_validate_unexpected_error(self, mock_requests: MagicMock) -> None:
self._assert_validate_raises(500, UnexpectedValidationError, mock_requests)
@@ -874,3 +1073,652 @@ class TestListAnnouncements:
result = connector._list_announcements(course_id=1)
assert result == []
class TestCheckpoint:
def test_build_dummy_checkpoint(self) -> None:
connector = _build_connector()
cp = connector.build_dummy_checkpoint()
assert cp.has_more is True
assert cp.course_ids == []
assert cp.current_course_index == 0
assert cp.stage == CanvasStage.PAGES
def test_validate_checkpoint_json(self) -> None:
connector = _build_connector()
cp = CanvasConnectorCheckpoint(
has_more=True,
course_ids=[1, 2],
current_course_index=1,
stage=CanvasStage.ASSIGNMENTS,
)
json_str = cp.model_dump_json()
restored = connector.validate_checkpoint_json(json_str)
assert restored.course_ids == [1, 2]
assert restored.current_course_index == 1
assert restored.stage == CanvasStage.ASSIGNMENTS
assert restored.has_more is True
# ---------------------------------------------------------------------------
# load_from_checkpoint tests
# ---------------------------------------------------------------------------
class TestLoadFromCheckpoint:
@patch("onyx.connectors.canvas.client.rl_requests")
def test_first_call_materializes_courses(self, mock_requests: MagicMock) -> None:
"""First call should populate course_ids and yield no documents."""
mock_requests.get.side_effect = _make_url_dispatcher(
courses=[_mock_course(1), _mock_course(2, "Data Structures", "CS201")]
)
connector = _build_connector()
cp = connector.build_dummy_checkpoint()
items, new_cp = _run_checkpoint(connector, cp)
assert items == []
assert new_cp.course_ids == [1, 2]
assert new_cp.current_course_index == 0
assert new_cp.stage == CanvasStage.PAGES
assert new_cp.has_more is True
@patch("onyx.connectors.canvas.client.rl_requests")
def test_processes_pages_stage(self, mock_requests: MagicMock) -> None:
"""Pages stage yields page documents within the time window."""
mock_requests.get.side_effect = _make_url_dispatcher(
pages=[_mock_page(10, "Syllabus", "2025-06-15T12:00:00Z")]
)
connector = _build_connector()
cp = CanvasConnectorCheckpoint(
has_more=True,
course_ids=[1],
current_course_index=0,
stage=CanvasStage.PAGES,
)
start = datetime(2025, 6, 1, 0, 0, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, 0, 0, tzinfo=timezone.utc).timestamp()
items, new_cp = _run_checkpoint(connector, cp, start, end)
expected_count = 1
expected_id = "canvas-page-1-10"
assert len(items) == expected_count
assert isinstance(items[0], Document)
assert items[0].id == expected_id
assert new_cp.stage == CanvasStage.ASSIGNMENTS
@patch("onyx.connectors.canvas.client.rl_requests")
def test_advances_through_all_stages(self, mock_requests: MagicMock) -> None:
"""Calling checkpoint 3 times advances pages -> assignments -> announcements -> next course."""
page = _mock_page(10, updated_at="2025-06-15T12:00:00Z")
assignment = _mock_assignment(20, updated_at="2025-06-15T12:00:00Z")
announcement = _mock_announcement(30, posted_at="2025-06-15T12:00:00Z")
mock_requests.get.side_effect = _make_url_dispatcher(
pages=[page], assignments=[assignment], announcements=[announcement]
)
connector = _build_connector()
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
cp = CanvasConnectorCheckpoint(
has_more=True,
course_ids=[1],
current_course_index=0,
stage=CanvasStage.PAGES,
)
# Stage 1: pages
items1, cp = _run_checkpoint(connector, cp, start, end)
assert cp.stage == CanvasStage.ASSIGNMENTS
assert len(items1) == 1
# Stage 2: assignments
mock_requests.get.side_effect = _make_url_dispatcher(assignments=[assignment])
items2, cp = _run_checkpoint(connector, cp, start, end)
assert cp.stage == CanvasStage.ANNOUNCEMENTS
assert len(items2) == 1
# Stage 3: announcements -> advances course index
mock_requests.get.side_effect = _make_url_dispatcher(
announcements=[announcement]
)
items3, cp = _run_checkpoint(connector, cp, start, end)
assert cp.current_course_index == 1
assert cp.stage == CanvasStage.PAGES
assert cp.has_more is False
@patch("onyx.connectors.canvas.client.rl_requests")
def test_filters_by_time_window(self, mock_requests: MagicMock) -> None:
"""Only documents within (start, end] are yielded."""
old_page = _mock_page(10, updated_at="2025-01-01T00:00:00Z")
new_page = _mock_page(11, title="New Page", updated_at="2025-06-15T12:00:00Z")
mock_requests.get.side_effect = _make_url_dispatcher(pages=[new_page, old_page])
connector = _build_connector()
cp = CanvasConnectorCheckpoint(
has_more=True,
course_ids=[1],
current_course_index=0,
stage=CanvasStage.PAGES,
)
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
items, _ = _run_checkpoint(connector, cp, start, end)
expected_count = 1
expected_id = "canvas-page-1-11"
assert len(items) == expected_count
assert isinstance(items[0], Document)
assert items[0].id == expected_id
@patch("onyx.connectors.canvas.client.rl_requests")
def test_skips_announcement_without_posted_at(
self, mock_requests: MagicMock
) -> None:
announcement = _mock_announcement()
announcement["posted_at"] = None
mock_requests.get.side_effect = _make_url_dispatcher(
announcements=[announcement]
)
connector = _build_connector()
cp = CanvasConnectorCheckpoint(
has_more=True,
course_ids=[1],
current_course_index=0,
stage=CanvasStage.ANNOUNCEMENTS,
)
items, _ = _run_checkpoint(connector, cp)
assert len(items) == 0
def test_stage_failure_advances_stage_and_yields_failure(self) -> None:
"""A 500 on a stage fetch yields a stage-level ConnectorFailure and
advances to the next stage, so the framework doesn't loop on the
same failing state forever."""
connector = _build_connector()
cp = CanvasConnectorCheckpoint(
has_more=True,
course_ids=[1, 2],
current_course_index=0,
stage=CanvasStage.PAGES,
)
with patch.object(
connector,
"_fetch_stage_page",
side_effect=OnyxError(
OnyxErrorCode.INTERNAL_ERROR,
"boom",
status_code_override=500,
),
):
items, new_cp = _run_checkpoint(connector, cp)
expected_entity_id = "canvas-pages-1"
assert len(items) == 1
assert isinstance(items[0], ConnectorFailure)
assert items[0].failed_entity is not None
assert items[0].failed_entity.entity_id == expected_entity_id
assert new_cp.stage == CanvasStage.ASSIGNMENTS
assert new_cp.current_course_index == 0
assert new_cp.next_url is None
assert new_cp.has_more is True
def test_course_404_advances_course_and_yields_failure(self) -> None:
"""A 404 on a stage fetch means the whole course is inaccessible —
yield a course-level ConnectorFailure and skip to the next course
instead of burning API calls on every stage of a missing course."""
connector = _build_connector()
cp = CanvasConnectorCheckpoint(
has_more=True,
course_ids=[1, 2],
current_course_index=0,
stage=CanvasStage.PAGES,
)
with patch.object(
connector,
"_fetch_stage_page",
side_effect=OnyxError(
OnyxErrorCode.NOT_FOUND,
"course gone",
status_code_override=404,
),
):
items, new_cp = _run_checkpoint(connector, cp)
expected_entity_id = "canvas-course-1"
expected_next_course_index = 1
assert len(items) == 1
assert isinstance(items[0], ConnectorFailure)
assert items[0].failed_entity is not None
assert items[0].failed_entity.entity_id == expected_entity_id
assert new_cp.current_course_index == expected_next_course_index
assert new_cp.stage == CanvasStage.PAGES
assert new_cp.next_url is None
assert new_cp.has_more is True
def test_fatal_auth_failure_during_stage_fetch_propagates(self) -> None:
connector = _build_connector()
cp = CanvasConnectorCheckpoint(
has_more=True,
course_ids=[1],
current_course_index=0,
stage=CanvasStage.PAGES,
)
with patch("onyx.connectors.canvas.client.rl_requests") as mock_requests:
mock_requests.get.return_value = _mock_response(401, {})
with pytest.raises(CredentialExpiredError):
_run_checkpoint(connector, cp)
def test_security_failure_during_stage_fetch_propagates(self) -> None:
connector = _build_connector()
cp = CanvasConnectorCheckpoint(
has_more=True,
course_ids=[1],
current_course_index=0,
stage=CanvasStage.PAGES,
)
with patch.object(
connector,
"_fetch_stage_page",
side_effect=OnyxError(OnyxErrorCode.BAD_GATEWAY, "bad next link"),
):
with pytest.raises(OnyxError, match="bad next link"):
_run_checkpoint(connector, cp)
@patch("onyx.connectors.canvas.client.rl_requests")
def test_per_document_conversion_failure_yields_connector_failure(
self, mock_requests: MagicMock
) -> None:
"""Bad data for one page yields ConnectorFailure, doesn't stop processing."""
bad_page = {
"page_id": 10,
"url": "test",
"title": "Test",
"body": None,
"created_at": "2025-06-15T12:00:00Z",
"updated_at": "bad-date",
}
mock_requests.get.side_effect = _make_url_dispatcher(pages=[bad_page])
connector = _build_connector()
cp = CanvasConnectorCheckpoint(
has_more=True,
course_ids=[1],
current_course_index=0,
stage=CanvasStage.PAGES,
)
items, new_cp = _run_checkpoint(connector, cp)
assert len(items) == 1
assert isinstance(items[0], ConnectorFailure)
assert new_cp.stage == CanvasStage.ASSIGNMENTS
@patch("onyx.connectors.canvas.client.rl_requests")
def test_all_courses_done_sets_has_more_false(
self, mock_requests: MagicMock
) -> None:
mock_requests.get.side_effect = _make_url_dispatcher()
connector = _build_connector()
cp = CanvasConnectorCheckpoint(
has_more=True, course_ids=[1], current_course_index=1
)
items, new_cp = _run_checkpoint(connector, cp)
assert items == []
assert new_cp.has_more is False
def test_invalid_stage_raises_value_error(self) -> None:
connector = _build_connector()
cp = CanvasConnectorCheckpoint(
has_more=True,
course_ids=[1],
current_course_index=0,
stage=CanvasStage.PAGES,
)
cp.stage = "invalid" # type: ignore[assignment]
with pytest.raises(ValueError, match="Invalid checkpoint stage"):
_run_checkpoint(connector, cp)
# ---------------------------------------------------------------------------
# load_from_checkpoint_with_perm_sync tests
# ---------------------------------------------------------------------------
class TestLoadFromCheckpointWithPermSync:
@patch("onyx.connectors.canvas.connector.get_course_permissions")
@patch("onyx.connectors.canvas.client.rl_requests")
def test_documents_have_external_access(
self, mock_requests: MagicMock, mock_perms: MagicMock
) -> None:
"""load_from_checkpoint_with_perm_sync attaches ExternalAccess to documents."""
expected_access = ExternalAccess(
external_user_emails={"student@school.edu"},
external_user_group_ids=set(),
is_public=False,
)
mock_perms.return_value = expected_access
mock_requests.get.side_effect = _make_url_dispatcher(
pages=[_mock_page(10, "Syllabus", "2025-06-15T12:00:00Z")]
)
connector = _build_connector()
cp = CanvasConnectorCheckpoint(
has_more=True,
course_ids=[1],
current_course_index=0,
stage=CanvasStage.PAGES,
)
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
gen = connector.load_from_checkpoint_with_perm_sync(start, end, cp)
items: list[Document | HierarchyNode | ConnectorFailure] = []
new_cp: CanvasConnectorCheckpoint | None = None
try:
while True:
items.append(next(gen))
except StopIteration as e:
new_cp = e.value
assert new_cp is not None
assert len(items) == 1
assert isinstance(items[0], Document)
assert items[0].external_access == expected_access
assert new_cp.stage == CanvasStage.ASSIGNMENTS
mock_perms.assert_called_once()
# ---------------------------------------------------------------------------
# Helper function tests
# ---------------------------------------------------------------------------
class TestParseCanvasDt:
def test_z_suffix_parsed_as_utc(self) -> None:
result = _parse_canvas_dt("2025-06-15T12:00:00Z")
expected = datetime(2025, 6, 15, 12, 0, 0, tzinfo=timezone.utc)
assert result == expected
def test_plus_offset_parsed_as_utc(self) -> None:
result = _parse_canvas_dt("2025-06-15T12:00:00+00:00")
expected = datetime(2025, 6, 15, 12, 0, 0, tzinfo=timezone.utc)
assert result == expected
def test_result_is_timezone_aware(self) -> None:
result = _parse_canvas_dt("2025-01-01T00:00:00Z")
assert result.tzinfo is not None
class TestUnixToCanvasTime:
def test_known_epoch_produces_expected_string(self) -> None:
epoch = datetime(2025, 6, 15, 12, 0, 0, tzinfo=timezone.utc).timestamp()
result = _unix_to_canvas_time(epoch)
assert result == "2025-06-15T12:00:00Z"
def test_round_trips_with_parse_canvas_dt(self) -> None:
epoch = datetime(2025, 3, 10, 8, 30, 0, tzinfo=timezone.utc).timestamp()
result = _parse_canvas_dt(_unix_to_canvas_time(epoch))
expected = datetime(2025, 3, 10, 8, 30, 0, tzinfo=timezone.utc)
assert result == expected
class TestInTimeWindow:
def test_inside_window(self) -> None:
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
result = _in_time_window("2025-06-15T12:00:00Z", start, end)
assert result is True
def test_before_window(self) -> None:
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
result = _in_time_window("2025-05-01T12:00:00Z", start, end)
assert result is False
def test_after_window(self) -> None:
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
result = _in_time_window("2025-07-15T12:00:00Z", start, end)
assert result is False
def test_start_boundary_is_exclusive(self) -> None:
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
result = _in_time_window("2025-06-01T00:00:00Z", start, end)
assert result is False
def test_end_boundary_is_inclusive(self) -> None:
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
result = _in_time_window("2025-06-30T00:00:00Z", start, end)
assert result is True
class TestFetchStagePage:
def test_uses_full_url_when_next_url_set(self) -> None:
connector = _build_connector()
with patch.object(
connector.canvas_client, "get", return_value=([{"id": 1}], None)
) as mock_get:
result, next_url = connector._fetch_stage_page(
next_url="https://myschool.instructure.com/api/v1/courses?page=2",
endpoint="courses/1/pages",
params={"per_page": "100"},
)
mock_get.assert_called_once_with(
full_url="https://myschool.instructure.com/api/v1/courses?page=2"
)
assert result == [{"id": 1}]
def test_uses_endpoint_and_params_when_no_next_url(self) -> None:
connector = _build_connector()
with patch.object(
connector.canvas_client, "get", return_value=([{"id": 1}], None)
) as mock_get:
result, next_url = connector._fetch_stage_page(
next_url=None,
endpoint="courses/1/pages",
params={"per_page": "100"},
)
mock_get.assert_called_once_with(
endpoint="courses/1/pages", params={"per_page": "100"}
)
def test_returns_empty_list_for_none_response(self) -> None:
connector = _build_connector()
with patch.object(connector.canvas_client, "get", return_value=(None, None)):
result, next_url = connector._fetch_stage_page(
next_url=None,
endpoint="courses/1/pages",
params={},
)
assert result == []
assert next_url is None
class TestProcessItems:
def test_pages_in_window_converted(self) -> None:
connector = _build_connector()
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
results, early_exit = connector._process_items(
response=[_mock_page(10, "Syllabus", "2025-06-15T12:00:00Z")],
stage=CanvasStage.PAGES,
course_id=1,
start=start,
end=end,
include_permissions=False,
)
assert len(results) == 1
assert isinstance(results[0], Document)
assert early_exit is False
def test_pages_outside_window_skipped(self) -> None:
connector = _build_connector()
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
results, early_exit = connector._process_items(
response=[_mock_page(10, "Old", "2025-01-01T12:00:00Z")],
stage=CanvasStage.PAGES,
course_id=1,
start=start,
end=end,
include_permissions=False,
)
assert results == []
assert early_exit is True
def test_assignments_in_window_converted(self) -> None:
connector = _build_connector()
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
results, early_exit = connector._process_items(
response=[_mock_assignment(20, "HW1", 1, "2025-06-15T12:00:00Z")],
stage=CanvasStage.ASSIGNMENTS,
course_id=1,
start=start,
end=end,
include_permissions=False,
)
assert len(results) == 1
assert isinstance(results[0], Document)
assert early_exit is False
def test_announcements_in_window_converted(self) -> None:
connector = _build_connector()
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
results, early_exit = connector._process_items(
response=[_mock_announcement(30, "News", 1, "2025-06-15T12:00:00Z")],
stage=CanvasStage.ANNOUNCEMENTS,
course_id=1,
start=start,
end=end,
include_permissions=False,
)
assert len(results) == 1
assert isinstance(results[0], Document)
assert early_exit is False
def test_bad_item_yields_connector_failure(self) -> None:
connector = _build_connector()
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
bad_page = {
"page_id": 10,
"url": "test",
"title": "Test",
"body": None,
"created_at": "2025-06-15T12:00:00Z",
"updated_at": "bad-date",
}
results, early_exit = connector._process_items(
response=[bad_page],
stage=CanvasStage.PAGES,
course_id=1,
start=start,
end=end,
include_permissions=False,
)
assert len(results) == 1
assert isinstance(results[0], ConnectorFailure)
def test_page_early_exit_on_old_item(self) -> None:
"""Pages sorted desc — item before start triggers early exit."""
connector = _build_connector()
start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
results, early_exit = connector._process_items(
response=[
_mock_page(10, "New", "2025-06-15T12:00:00Z"),
_mock_page(11, "Old", "2025-05-01T12:00:00Z"),
_mock_page(12, "Older", "2025-04-01T12:00:00Z"),
],
stage=CanvasStage.PAGES,
course_id=1,
start=start,
end=end,
include_permissions=False,
)
assert len(results) == 1
assert early_exit is True
class TestMaybeAttachPermissions:
def test_attaches_permissions_when_true(self) -> None:
connector = _build_connector()
doc = MagicMock(spec=Document)
doc.external_access = None
expected_access = ExternalAccess(
external_user_emails={"student@school.edu"},
external_user_group_ids=set(),
is_public=False,
)
with patch.object(
connector, "_get_course_permissions", return_value=expected_access
):
result = connector._maybe_attach_permissions(
doc, course_id=1, include_permissions=True
)
assert result.external_access == expected_access
def test_no_op_when_false(self) -> None:
connector = _build_connector()
doc = MagicMock(spec=Document)
doc.external_access = None
result = connector._maybe_attach_permissions(
doc, course_id=1, include_permissions=False
)
assert result.external_access is None

View File

@@ -0,0 +1,200 @@
"""Unit tests for GoogleDriveConnector slim retrieval routing.
Verifies that:
- GoogleDriveConnector implements SlimConnector so pruning takes the ID-only path
- retrieve_all_slim_docs() calls _extract_slim_docs_from_google_drive with include_permissions=False
- retrieve_all_slim_docs_perm_sync() calls _extract_slim_docs_from_google_drive with include_permissions=True
- celery_utils routing picks retrieve_all_slim_docs() for GoogleDriveConnector
"""
from unittest.mock import MagicMock
from unittest.mock import patch
from onyx.background.celery.celery_utils import extract_ids_from_runnable_connector
from onyx.connectors.google_drive.connector import GoogleDriveConnector
from onyx.connectors.google_drive.models import DriveRetrievalStage
from onyx.connectors.google_drive.models import GoogleDriveCheckpoint
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.interfaces import SlimConnectorWithPermSync
from onyx.connectors.models import SlimDocument
from onyx.utils.threadpool_concurrency import ThreadSafeDict
def _make_done_checkpoint() -> GoogleDriveCheckpoint:
return GoogleDriveCheckpoint(
retrieved_folder_and_drive_ids=set(),
completion_stage=DriveRetrievalStage.DONE,
completion_map=ThreadSafeDict(),
all_retrieved_file_ids=set(),
has_more=False,
)
def _make_connector() -> GoogleDriveConnector:
connector = GoogleDriveConnector(include_my_drives=True)
connector._creds = MagicMock()
connector._primary_admin_email = "admin@example.com"
return connector
class TestGoogleDriveSlimConnectorInterface:
def test_implements_slim_connector(self) -> None:
connector = _make_connector()
assert isinstance(connector, SlimConnector)
def test_implements_slim_connector_with_perm_sync(self) -> None:
connector = _make_connector()
assert isinstance(connector, SlimConnectorWithPermSync)
def test_slim_connector_checked_before_perm_sync(self) -> None:
"""SlimConnector must appear before SlimConnectorWithPermSync in MRO
so celery_utils isinstance check routes to retrieve_all_slim_docs()."""
mro = GoogleDriveConnector.__mro__
slim_idx = mro.index(SlimConnector)
perm_sync_idx = mro.index(SlimConnectorWithPermSync)
assert slim_idx < perm_sync_idx
class TestRetrieveAllSlimDocs:
def test_does_not_call_extract_when_checkpoint_is_done(self) -> None:
connector = _make_connector()
slim_doc = MagicMock(
spec=SlimDocument, id="doc1", parent_hierarchy_raw_node_id=None
)
with patch.object(
connector, "build_dummy_checkpoint", return_value=_make_done_checkpoint()
):
with patch.object(
connector,
"_extract_slim_docs_from_google_drive",
return_value=iter([[slim_doc]]),
) as mock_extract:
list(connector.retrieve_all_slim_docs())
mock_extract.assert_not_called() # loop exits immediately since checkpoint is DONE
def test_calls_extract_with_include_permissions_false_non_done_checkpoint(
self,
) -> None:
connector = _make_connector()
slim_doc = MagicMock(
spec=SlimDocument, id="doc1", parent_hierarchy_raw_node_id=None
)
# Checkpoint starts at START, _extract advances it to DONE
with patch.object(connector, "build_dummy_checkpoint") as mock_build:
start_checkpoint = GoogleDriveCheckpoint(
retrieved_folder_and_drive_ids=set(),
completion_stage=DriveRetrievalStage.START,
completion_map=ThreadSafeDict(),
all_retrieved_file_ids=set(),
has_more=False,
)
mock_build.return_value = start_checkpoint
def _advance_checkpoint(**_kwargs: object) -> object:
start_checkpoint.completion_stage = DriveRetrievalStage.DONE
yield [slim_doc]
with patch.object(
connector,
"_extract_slim_docs_from_google_drive",
side_effect=_advance_checkpoint,
) as mock_extract:
list(connector.retrieve_all_slim_docs())
mock_extract.assert_called_once()
_, kwargs = mock_extract.call_args
assert kwargs.get("include_permissions") is False
def test_yields_slim_documents(self) -> None:
connector = _make_connector()
slim_doc = MagicMock(
spec=SlimDocument, id="doc1", parent_hierarchy_raw_node_id=None
)
start_checkpoint = GoogleDriveCheckpoint(
retrieved_folder_and_drive_ids=set(),
completion_stage=DriveRetrievalStage.START,
completion_map=ThreadSafeDict(),
all_retrieved_file_ids=set(),
has_more=False,
)
with patch.object(
connector, "build_dummy_checkpoint", return_value=start_checkpoint
):
def _advance_and_yield(**_kwargs: object) -> object:
start_checkpoint.completion_stage = DriveRetrievalStage.DONE
yield [slim_doc]
with patch.object(
connector,
"_extract_slim_docs_from_google_drive",
side_effect=_advance_and_yield,
):
batches = list(connector.retrieve_all_slim_docs())
assert len(batches) == 1
assert batches[0][0] is slim_doc
class TestRetrieveAllSlimDocsPermSync:
def test_calls_extract_with_include_permissions_true(self) -> None:
connector = _make_connector()
slim_doc = MagicMock(
spec=SlimDocument, id="doc1", parent_hierarchy_raw_node_id=None
)
start_checkpoint = GoogleDriveCheckpoint(
retrieved_folder_and_drive_ids=set(),
completion_stage=DriveRetrievalStage.START,
completion_map=ThreadSafeDict(),
all_retrieved_file_ids=set(),
has_more=False,
)
with patch.object(
connector, "build_dummy_checkpoint", return_value=start_checkpoint
):
def _advance_and_yield(**_kwargs: object) -> object:
start_checkpoint.completion_stage = DriveRetrievalStage.DONE
yield [slim_doc]
with patch.object(
connector,
"_extract_slim_docs_from_google_drive",
side_effect=_advance_and_yield,
) as mock_extract:
list(connector.retrieve_all_slim_docs_perm_sync())
mock_extract.assert_called_once()
_, kwargs = mock_extract.call_args
assert (
kwargs.get("include_permissions") is None
or kwargs.get("include_permissions") is True
)
class TestCeleryUtilsRouting:
def test_pruning_uses_retrieve_all_slim_docs(self) -> None:
"""extract_ids_from_runnable_connector must call retrieve_all_slim_docs,
not retrieve_all_slim_docs_perm_sync, for GoogleDriveConnector."""
connector = _make_connector()
slim_doc = MagicMock(
spec=SlimDocument, id="doc1", parent_hierarchy_raw_node_id=None
)
with (
patch.object(
connector, "retrieve_all_slim_docs", return_value=iter([[slim_doc]])
) as mock_slim,
patch.object(
connector, "retrieve_all_slim_docs_perm_sync"
) as mock_perm_sync,
):
extract_ids_from_runnable_connector(
connector, connector_type="google_drive"
)
mock_slim.assert_called_once()
mock_perm_sync.assert_not_called()

View File

@@ -0,0 +1,86 @@
"""Tests for get_index_attempt_errors_across_connectors."""
from datetime import datetime
from datetime import timezone
from unittest.mock import MagicMock
from onyx.db.index_attempt import get_index_attempt_errors_across_connectors
from onyx.db.models import IndexAttemptError
def _make_error(
id: int = 1,
cc_pair_id: int = 1,
error_type: str | None = "TimeoutError",
is_resolved: bool = False,
) -> IndexAttemptError:
"""Create a mock IndexAttemptError."""
error = MagicMock(spec=IndexAttemptError)
error.id = id
error.connector_credential_pair_id = cc_pair_id
error.error_type = error_type
error.is_resolved = is_resolved
return error
class TestGetIndexAttemptErrorsAcrossConnectors:
def test_returns_errors_and_count(self) -> None:
mock_session = MagicMock()
mock_errors = [_make_error(id=1), _make_error(id=2)]
mock_session.scalar.return_value = 2
mock_session.scalars.return_value.all.return_value = mock_errors
errors, total = get_index_attempt_errors_across_connectors(
db_session=mock_session,
)
assert total == 2
assert len(errors) == 2
def test_returns_empty_when_no_errors(self) -> None:
mock_session = MagicMock()
mock_session.scalar.return_value = 0
mock_session.scalars.return_value.all.return_value = []
errors, total = get_index_attempt_errors_across_connectors(
db_session=mock_session,
)
assert total == 0
assert errors == []
def test_null_count_returns_zero(self) -> None:
mock_session = MagicMock()
mock_session.scalar.return_value = None
mock_session.scalars.return_value.all.return_value = []
errors, total = get_index_attempt_errors_across_connectors(
db_session=mock_session,
)
assert total == 0
def test_passes_filters_to_query(self) -> None:
"""Verify that filter parameters result in .where() calls on the statement."""
mock_session = MagicMock()
mock_session.scalar.return_value = 0
mock_session.scalars.return_value.all.return_value = []
start = datetime(2026, 1, 1, tzinfo=timezone.utc)
end = datetime(2026, 12, 31, tzinfo=timezone.utc)
# Should not raise — just verifying the function accepts all filter params
get_index_attempt_errors_across_connectors(
db_session=mock_session,
cc_pair_id=42,
error_type="TimeoutError",
start_time=start,
end_time=end,
unresolved_only=True,
page=2,
page_size=10,
)
# The function should have called scalar (for count) and scalars (for results)
assert mock_session.scalar.called
assert mock_session.scalars.called

View File

@@ -1,9 +1,13 @@
import io
from typing import cast
from unittest.mock import MagicMock
import openpyxl
from openpyxl.worksheet.worksheet import Worksheet
from onyx.file_processing.extract_file_text import _clean_worksheet_matrix
from onyx.file_processing.extract_file_text import _worksheet_to_matrix
from onyx.file_processing.extract_file_text import xlsx_sheet_extraction
from onyx.file_processing.extract_file_text import xlsx_to_text
@@ -196,3 +200,182 @@ class TestXlsxToText:
assert "r1c1" in lines[0] and "r1c2" in lines[0]
assert "r2c1" in lines[1] and "r2c2" in lines[1]
assert "r3c1" in lines[2] and "r3c2" in lines[2]
class TestWorksheetToMatrixJaggedRows:
"""openpyxl read_only mode can yield rows of differing widths when
trailing cells are empty. The matrix must be padded to a rectangle
so downstream column cleanup can index safely."""
def test_pads_shorter_trailing_rows(self) -> None:
ws = MagicMock()
ws.iter_rows.return_value = iter(
[
("A", "B", "C"),
("X", "Y"),
("P",),
]
)
matrix = _worksheet_to_matrix(ws)
assert matrix == [["A", "B", "C"], ["X", "Y", ""], ["P", "", ""]]
def test_pads_when_first_row_is_shorter(self) -> None:
ws = MagicMock()
ws.iter_rows.return_value = iter(
[
("A",),
("X", "Y", "Z"),
]
)
matrix = _worksheet_to_matrix(ws)
assert matrix == [["A", "", ""], ["X", "Y", "Z"]]
def test_clean_worksheet_matrix_no_index_error_on_jagged_rows(self) -> None:
"""Regression: previously raised IndexError when a later row was
shorter than the first row and the out-of-range column on the
first row was empty (so the short-circuit in `all()` did not
save us)."""
ws = MagicMock()
ws.iter_rows.return_value = iter(
[
("A", "", "", "B"),
("X", "Y"),
]
)
matrix = _worksheet_to_matrix(ws)
# Must not raise.
cleaned = _clean_worksheet_matrix(matrix)
assert cleaned == [["A", "", "", "B"], ["X", "Y", "", ""]]
class TestXlsxSheetExtraction:
def test_one_tuple_per_sheet(self) -> None:
xlsx = _make_xlsx(
{
"Revenue": [["Month", "Amount"], ["Jan", "100"]],
"Expenses": [["Category", "Cost"], ["Rent", "500"]],
}
)
sheets = xlsx_sheet_extraction(xlsx)
assert len(sheets) == 2
# Order preserved from workbook sheet order
titles = [title for _csv, title in sheets]
assert titles == ["Revenue", "Expenses"]
# Content present in the right tuple
revenue_csv, _ = sheets[0]
expenses_csv, _ = sheets[1]
assert "Month" in revenue_csv
assert "Jan" in revenue_csv
assert "Category" in expenses_csv
assert "Rent" in expenses_csv
def test_tuple_structure_is_csv_text_then_title(self) -> None:
"""The tuple order is (csv_text, sheet_title) — pin it so callers
that unpack positionally don't silently break."""
xlsx = _make_xlsx({"MySheet": [["a", "b"]]})
sheets = xlsx_sheet_extraction(xlsx)
assert len(sheets) == 1
csv_text, title = sheets[0]
assert title == "MySheet"
assert "a" in csv_text
assert "b" in csv_text
def test_empty_sheet_is_skipped(self) -> None:
"""A sheet whose CSV output is empty/whitespace-only should NOT
appear in the result — the `if csv_text.strip():` guard filters
it out."""
xlsx = _make_xlsx(
{
"Data": [["a", "b"]],
"Empty": [],
}
)
sheets = xlsx_sheet_extraction(xlsx)
assert len(sheets) == 1
assert sheets[0][1] == "Data"
def test_empty_workbook_returns_empty_list(self) -> None:
"""All sheets empty → empty list (not a list of empty tuples)."""
xlsx = _make_xlsx({"Sheet1": [], "Sheet2": []})
sheets = xlsx_sheet_extraction(xlsx)
assert sheets == []
def test_single_sheet(self) -> None:
xlsx = _make_xlsx({"Only": [["x", "y"], ["1", "2"]]})
sheets = xlsx_sheet_extraction(xlsx)
assert len(sheets) == 1
csv_text, title = sheets[0]
assert title == "Only"
assert "x" in csv_text
assert "1" in csv_text
def test_bad_zip_returns_empty_list(self) -> None:
bad_file = io.BytesIO(b"not a zip file")
sheets = xlsx_sheet_extraction(bad_file, file_name="test.xlsx")
assert sheets == []
def test_bad_zip_tilde_file_returns_empty_list(self) -> None:
"""`~$`-prefixed files are Excel lock files; failure should log
at debug (not warning) and still return []."""
bad_file = io.BytesIO(b"not a zip file")
sheets = xlsx_sheet_extraction(bad_file, file_name="~$temp.xlsx")
assert sheets == []
def test_csv_content_matches_xlsx_to_text_per_sheet(self) -> None:
"""For a single-sheet workbook, xlsx_to_text output should equal
the csv_text from xlsx_sheet_extraction — they share the same
per-sheet CSV-ification logic."""
single_sheet_data = [["Name", "Age"], ["Alice", "30"]]
expected_text = xlsx_to_text(_make_xlsx({"People": single_sheet_data}))
sheets = xlsx_sheet_extraction(_make_xlsx({"People": single_sheet_data}))
assert len(sheets) == 1
csv_text, title = sheets[0]
assert title == "People"
assert csv_text.strip() == expected_text.strip()
def test_commas_in_cells_are_quoted(self) -> None:
xlsx = _make_xlsx({"S1": [["hello, world", "normal"]]})
sheets = xlsx_sheet_extraction(xlsx)
assert len(sheets) == 1
csv_text, _ = sheets[0]
assert '"hello, world"' in csv_text
def test_long_empty_row_run_capped_within_sheet(self) -> None:
"""The matrix cleanup applies per-sheet: >2 empty rows collapse
to 2, which keeps the sheet non-empty and it still appears in
the result."""
xlsx = _make_xlsx(
{
"S1": [
["header"],
[""],
[""],
[""],
[""],
["data"],
]
}
)
sheets = xlsx_sheet_extraction(xlsx)
assert len(sheets) == 1
csv_text, _ = sheets[0]
lines = csv_text.strip().split("\n")
# header + 2 empty (capped) + data = 4 lines
assert len(lines) == 4
assert "header" in lines[0]
assert "data" in lines[-1]
def test_sheet_title_with_special_chars_preserved(self) -> None:
"""Spaces, punctuation, unicode in sheet titles are preserved
verbatim — the title is used as a link anchor downstream."""
xlsx = _make_xlsx(
{
"Q1 Revenue (USD)": [["a", "b"]],
"Données": [["c", "d"]],
}
)
sheets = xlsx_sheet_extraction(xlsx)
titles = [title for _csv, title in sheets]
assert "Q1 Revenue (USD)" in titles
assert "Données" in titles

View File

@@ -0,0 +1,551 @@
"""End-to-end tests for `TabularChunker.chunk_section`.
Each test is structured as:
INPUT — the CSV text passed to the chunker + token budget + link
EXPECTED — the exact chunk texts the chunker should emit
ACT — a single call to `chunk_section`
ASSERT — literal equality against the expected chunk texts
A character-level tokenizer (1 char == 1 token) is used so token-budget
arithmetic is deterministic and expected chunks can be spelled out
exactly.
"""
from onyx.connectors.models import Section
from onyx.connectors.models import TabularSection
from onyx.indexing.chunking.section_chunker import AccumulatorState
from onyx.indexing.chunking.tabular_section_chunker import TabularChunker
from onyx.natural_language_processing.utils import BaseTokenizer
class CharTokenizer(BaseTokenizer):
def encode(self, string: str) -> list[int]:
return [ord(c) for c in string]
def tokenize(self, string: str) -> list[str]:
return list(string)
def decode(self, tokens: list[int]) -> str:
return "".join(chr(t) for t in tokens)
def _make_chunker() -> TabularChunker:
return TabularChunker(tokenizer=CharTokenizer())
def _tabular_section(text: str, link: str = "sheet:Test") -> Section:
return TabularSection(text=text, link=link)
class TestTabularChunkerChunkSection:
def test_simple_csv_all_rows_fit_one_chunk(self) -> None:
# --- INPUT -----------------------------------------------------
csv_text = "Name,Age,City\n" "Alice,30,NYC\n" "Bob,25,SF\n"
link = "sheet:People"
content_token_limit = 500
# --- EXPECTED --------------------------------------------------
expected_texts = [
(
"sheet:People\n"
"Columns: Name, Age, City\n"
"Name=Alice, Age=30, City=NYC\n"
"Name=Bob, Age=25, City=SF"
),
]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=content_token_limit,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
assert [p.is_continuation for p in out.payloads] == [False]
assert all(p.links == {0: link} for p in out.payloads)
assert out.accumulator.is_empty()
def test_overflow_splits_into_two_deterministic_chunks(self) -> None:
# --- INPUT -----------------------------------------------------
# prelude = "sheet:S\nColumns: col, val" (25 chars = 25 tokens)
# At content_token_limit=57, row_budget = max(16, 57-31-1) = 25.
# Each row "col=a, val=1" is 12 tokens; two rows + \n = 25 (fits),
# three rows + 2×\n = 38 (overflows) → split after 2 rows.
csv_text = "col,val\n" "a,1\n" "b,2\n" "c,3\n" "d,4\n"
link = "sheet:S"
content_token_limit = 57
# --- EXPECTED --------------------------------------------------
expected_texts = [
("sheet:S\n" "Columns: col, val\n" "col=a, val=1\n" "col=b, val=2"),
("sheet:S\n" "Columns: col, val\n" "col=c, val=3\n" "col=d, val=4"),
]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=content_token_limit,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
# First chunk is fresh; subsequent chunks mark as continuations.
assert [p.is_continuation for p in out.payloads] == [False, True]
# Link carries through every chunk.
assert all(p.links == {0: link} for p in out.payloads)
# Add back in shortly
# def test_header_only_csv_produces_single_prelude_chunk(self) -> None:
# # --- INPUT -----------------------------------------------------
# csv_text = "col1,col2\n"
# link = "sheet:Headers"
# # --- EXPECTED --------------------------------------------------
# expected_texts = [
# "sheet:Headers\nColumns: col1, col2",
# ]
# # --- ACT -------------------------------------------------------
# out = _make_chunker().chunk_section(
# _tabular_section(csv_text, link=link),
# AccumulatorState(),
# content_token_limit=500,
# )
# # --- ASSERT ----------------------------------------------------
# assert [p.text for p in out.payloads] == expected_texts
def test_empty_cells_dropped_from_chunk_text(self) -> None:
# --- INPUT -----------------------------------------------------
# Alice's Age is empty; Bob's City is empty. Empty cells should
# not appear as `field=` pairs in the output.
csv_text = "Name,Age,City\n" "Alice,,NYC\n" "Bob,25,\n"
link = "sheet:P"
# --- EXPECTED --------------------------------------------------
expected_texts = [
(
"sheet:P\n"
"Columns: Name, Age, City\n"
"Name=Alice, City=NYC\n"
"Name=Bob, Age=25"
),
]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=500,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
def test_quoted_commas_in_csv_preserved_as_one_field(self) -> None:
# --- INPUT -----------------------------------------------------
# "Hello, world" is quoted in the CSV, so csv.reader parses it as
# a single field. The surrounding quotes are stripped during
# decoding, so the chunk text carries the bare value.
csv_text = "Name,Notes\n" 'Alice,"Hello, world"\n'
link = "sheet:P"
# --- EXPECTED --------------------------------------------------
expected_texts = [
("sheet:P\n" "Columns: Name, Notes\n" "Name=Alice, Notes=Hello, world"),
]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=500,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
def test_blank_rows_in_csv_are_skipped(self) -> None:
# --- INPUT -----------------------------------------------------
# Stray blank rows in the CSV (e.g. export artifacts) shouldn't
# produce ghost rows in the output.
csv_text = "A,B\n" "\n" "1,2\n" "\n" "\n" "3,4\n"
link = "sheet:S"
# --- EXPECTED --------------------------------------------------
expected_texts = [
("sheet:S\n" "Columns: A, B\n" "A=1, B=2\n" "A=3, B=4"),
]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=500,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
def test_accumulator_flushes_before_tabular_chunks(self) -> None:
# --- INPUT -----------------------------------------------------
# A text accumulator was populated by the prior text section.
# Tabular sections are structural boundaries, so the pending
# text is flushed as its own chunk before the tabular content.
pending_text = "prior paragraph from an earlier text section"
pending_link = "prev-link"
csv_text = "a,b\n" "1,2\n"
link = "sheet:S"
# --- EXPECTED --------------------------------------------------
expected_texts = [
pending_text, # flushed accumulator
("sheet:S\n" "Columns: a, b\n" "a=1, b=2"),
]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(
text=pending_text,
link_offsets={0: pending_link},
),
content_token_limit=500,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
# Flushed chunk keeps the prior text's link; tabular chunk uses
# the tabular section's link.
assert out.payloads[0].links == {0: pending_link}
assert out.payloads[1].links == {0: link}
# Accumulator resets — tabular section is a structural boundary.
assert out.accumulator.is_empty()
def test_multi_row_packing_under_budget_emits_single_chunk(self) -> None:
# --- INPUT -----------------------------------------------------
# Three small rows (20 tokens each) under a generous
# content_token_limit=100 should pack into ONE chunk — prelude
# emitted once, rows stacked beneath it.
csv_text = (
"x\n" "aaaaaaaaaaaaaaaaaa\n" "bbbbbbbbbbbbbbbbbb\n" "cccccccccccccccccc\n"
)
link = "S"
content_token_limit = 100
# --- EXPECTED --------------------------------------------------
# Each formatted row "x=<18-char value>" = 20 tokens.
# Full chunk with sheet + Columns + 3 rows =
# 1 + 1 + 10 + 1 + (20 + 1 + 20 + 1 + 20) = 75 tokens ≤ 100.
# Single chunk carries all three rows.
expected_texts = [
"S\n"
"Columns: x\n"
"x=aaaaaaaaaaaaaaaaaa\n"
"x=bbbbbbbbbbbbbbbbbb\n"
"x=cccccccccccccccccc"
]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=content_token_limit,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
assert [p.is_continuation for p in out.payloads] == [False]
assert all(len(p.text) <= content_token_limit for p in out.payloads)
def test_packing_reserves_prelude_budget_so_every_chunk_has_full_prelude(
self,
) -> None:
# --- INPUT -----------------------------------------------------
# Budget (30) is large enough for all 5 bare rows (row_block =
# 24 tokens) to pack as one chunk if the prelude were optional,
# but [sheet] + Columns + 5_rows would be 41 tokens > 30. The
# packing logic reserves space for the prelude: only 2 rows
# pack per chunk (17 prelude overhead + 9 rows = 26 ≤ 30).
# Every emitted chunk therefore carries its full prelude rather
# than dropping Columns at emit time.
csv_text = "x\n" "aa\n" "bb\n" "cc\n" "dd\n" "ee\n"
link = "S"
content_token_limit = 30
# --- EXPECTED --------------------------------------------------
# Prelude overhead = 'S\nColumns: x\n' = 1+1+10+1 = 13.
# Each row "x=XX" = 4 tokens, row separator "\n" = 1.
# 3 rows: 13 + (4+1+4+1+4) = 27 ≤ 30 ✓
# 4 rows: 13 + (4+1+4+1+4+1+4) = 32 > 30 ✗
# → 3 rows in the first chunk, 2 rows in the second.
expected_texts = [
"S\nColumns: x\nx=aa\nx=bb\nx=cc",
"S\nColumns: x\nx=dd\nx=ee",
]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=content_token_limit,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
# Every chunk fits under the budget AND carries its full
# prelude — that's the whole point of this check.
assert all(len(p.text) <= content_token_limit for p in out.payloads)
assert all("Columns: x" in p.text for p in out.payloads)
def test_oversized_row_splits_into_field_pieces_no_prelude(self) -> None:
# --- INPUT -----------------------------------------------------
# Single-row CSV whose formatted form ("field 1=1, ..." = 53
# tokens) exceeds content_token_limit (20). Per the chunker's
# rules, oversized rows are split at field boundaries into
# pieces each ≤ max_tokens, and no prelude is added to split
# pieces (they already consume the full budget). A 53-token row
# packs into 3 field-boundary pieces under a 20-token budget.
csv_text = "field 1,field 2,field 3,field 4,field 5\n" "1,2,3,4,5\n"
link = "S"
content_token_limit = 20
# --- EXPECTED --------------------------------------------------
# Row = "field 1=1, field 2=2, field 3=3, field 4=4, field 5=5"
# Fields @ 9 tokens each, ", " sep = 2 tokens.
# "field 1=1, field 2=2" = 9+2+9 = 20 tokens ≤ 20 ✓
# + ", field 3=3" = 20+2+9 = 31 > 20 → flush, start new
# "field 3=3, field 4=4" = 9+2+9 = 20 ≤ 20 ✓
# + ", field 5=5" = 20+2+9 = 31 > 20 → flush, start new
# "field 5=5" = 9 ≤ 20 ✓
# ceil(53 / 20) = 3 chunks.
expected_texts = [
"field 1=1, field 2=2",
"field 3=3, field 4=4",
"field 5=5",
]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=content_token_limit,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
# Invariant: no chunk exceeds max_tokens.
assert all(len(p.text) <= content_token_limit for p in out.payloads)
# is_continuation: first chunk False, rest True.
assert [p.is_continuation for p in out.payloads] == [False, True, True]
def test_empty_tabular_section_flushes_accumulator_and_resets_it(
self,
) -> None:
# --- INPUT -----------------------------------------------------
# Tabular sections are structural boundaries, so any pending text
# buffer is flushed to a chunk before parsing the tabular content
# — even if the tabular section itself is empty. The accumulator
# is then reset.
pending_text = "prior paragraph"
pending_link_offsets = {0: "prev-link"}
# --- EXPECTED --------------------------------------------------
expected_texts = [pending_text]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section("", link="sheet:Empty"),
AccumulatorState(
text=pending_text,
link_offsets=pending_link_offsets,
),
content_token_limit=500,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
assert out.accumulator.is_empty()
def test_single_oversized_field_token_splits_at_id_boundaries(self) -> None:
# --- INPUT -----------------------------------------------------
# A single `field=value` pair that itself exceeds max_tokens can't
# be split at field boundaries — there's only one field. The
# chunker falls back to encoding the pair to token ids and
# slicing at max-token-sized windows.
#
# CSV has one column "x" with a 50-char value. Formatted pair =
# "x=" + 50 a's = 52 tokens. Budget = 10.
csv_text = "x\n" + ("a" * 50) + "\n"
link = "S"
content_token_limit = 10
# --- EXPECTED --------------------------------------------------
# 52-char pair at 10 tokens per window = 6 pieces:
# [0:10) "x=aaaaaaaa" (10)
# [10:20) "aaaaaaaaaa" (10)
# [20:30) "aaaaaaaaaa" (10)
# [30:40) "aaaaaaaaaa" (10)
# [40:50) "aaaaaaaaaa" (10)
# [50:52) "aa" (2)
# Split pieces carry no prelude (they already consume the budget).
expected_texts = [
"x=aaaaaaaa",
"aaaaaaaaaa",
"aaaaaaaaaa",
"aaaaaaaaaa",
"aaaaaaaaaa",
"aa",
]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=content_token_limit,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
# Every piece is ≤ max_tokens — the invariant the token-level
# fallback exists to enforce.
assert all(len(p.text) <= content_token_limit for p in out.payloads)
def test_underscored_column_gets_friendly_alias_in_parens(self) -> None:
# --- INPUT -----------------------------------------------------
# Column headers with underscores get a space-substituted friendly
# alias appended in parens on the `Columns:` line. Plain headers
# pass through untouched.
csv_text = "MTTR_hours,id,owner_name\n" "3,42,Alice\n"
link = "sheet:M"
# --- EXPECTED --------------------------------------------------
expected_texts = [
(
"sheet:M\n"
"Columns: MTTR_hours (MTTR hours), id, owner_name (owner name)\n"
"MTTR_hours=3, id=42, owner_name=Alice"
),
]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=500,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
def test_oversized_row_between_small_rows_preserves_flanking_chunks(
self,
) -> None:
# --- INPUT -----------------------------------------------------
# State-machine check: small row, oversized row, small row. The
# first small row should become a preluded chunk; the oversized
# row flushes it and emits split fragments without prelude; then
# the last small row picks up from wherever the split left off.
#
# Headers a,b,c,d. Row 1 and row 3 each have only column `a`
# populated (tiny). Row 2 is a "fat" row with all four columns
# populated.
csv_text = "a,b,c,d\n" "1,,,\n" "xxx,yyy,zzz,www\n" "2,,,\n"
link = "S"
content_token_limit = 20
# --- EXPECTED --------------------------------------------------
# Prelude = 'S\nColumns: a, b, c, d\n' = 1+1+19+1 = 22 > 20, so
# sheet fits with the row but full Columns header does not.
# Row 1 formatted = "a=1" (3). build_chunk_from_scratch:
# cols+row = 20+3 = 23 > 20 → skip cols. sheet+row = 1+1+3 = 5
# ≤ 20 → chunk = "S\na=1".
# Row 2 formatted = "a=xxx, b=yyy, c=zzz, d=www" (26 > 20) →
# flush "S\na=1" and split at pair boundaries:
# "a=xxx, b=yyy, c=zzz" (19 ≤ 20 ✓)
# "d=www" (5)
# Row 3 formatted = "a=2" (3). can_pack onto "d=www" (5):
# 5 + 3 + 1 = 9 ≤ 20 ✓ → packs. Trailing fragment from the
# split absorbs the next small row, which is the current v2
# behavior (the fragment becomes `current_chunk` and the next
# small row is appended with the standard packing rules).
expected_texts = [
"S\na=1",
"a=xxx, b=yyy, c=zzz",
"d=www\na=2",
]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=content_token_limit,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
assert all(len(p.text) <= content_token_limit for p in out.payloads)
def test_prelude_layering_column_header_fits_but_sheet_header_does_not(
self,
) -> None:
# --- INPUT -----------------------------------------------------
# Budget lets `Columns: x\nx=y` fit but not the additional sheet
# header on top. The chunker should add the column header and
# drop the sheet header.
#
# sheet = "LongSheetName" (13), cols = "Columns: x" (10),
# row = "x=y" (3). Budget = 15.
# cols + row: 10+1+3 = 14 ≤ 15 ✓
# sheet + cols + row: 13+1+10+1+3 = 28 > 15 ✗
csv_text = "x\n" "y\n"
link = "LongSheetName"
content_token_limit = 15
# --- EXPECTED --------------------------------------------------
expected_texts = ["Columns: x\nx=y"]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=content_token_limit,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts
def test_prelude_layering_sheet_header_fits_but_column_header_does_not(
self,
) -> None:
# --- INPUT -----------------------------------------------------
# Budget is too small for the column header but leaves room for
# the short sheet header. The chunker should fall back to just
# sheet + row (its layered "try cols, then try sheet on top of
# whatever we have" logic means sheet is attempted on the bare
# row when cols didn't fit).
#
# sheet = "S" (1), cols = "Columns: ABC, DEF" (17),
# row = "ABC=1, DEF=2" (12). Budget = 20.
# cols + row: 17+1+12 = 30 > 20 ✗
# sheet + row: 1+1+12 = 14 ≤ 20 ✓
csv_text = "ABC,DEF\n" "1,2\n"
link = "S"
content_token_limit = 20
# --- EXPECTED --------------------------------------------------
expected_texts = ["S\nABC=1, DEF=2"]
# --- ACT -------------------------------------------------------
out = _make_chunker().chunk_section(
_tabular_section(csv_text, link=link),
AccumulatorState(),
content_token_limit=content_token_limit,
)
# --- ASSERT ----------------------------------------------------
assert [p.text for p in out.payloads] == expected_texts

View File

@@ -0,0 +1,188 @@
"""Unit tests for MinimalPersonaSnapshot.from_model knowledge_sources aggregation."""
from unittest.mock import MagicMock
from unittest.mock import patch
from onyx.configs.constants import DocumentSource
from onyx.configs.constants import FederatedConnectorSource
from onyx.server.features.document_set.models import DocumentSetSummary
from onyx.server.features.persona.models import MinimalPersonaSnapshot
_STUB_DS_SUMMARY = DocumentSetSummary(
id=1,
name="stub",
description=None,
cc_pair_summaries=[],
is_up_to_date=True,
is_public=True,
users=[],
groups=[],
)
def _make_persona(**overrides: object) -> MagicMock:
"""Build a mock Persona with sensible defaults.
Every relationship defaults to empty so tests only need to set the
fields they care about.
"""
p = MagicMock()
p.id = 1
p.name = "test"
p.description = ""
p.tools = []
p.starter_messages = None
p.document_sets = []
p.hierarchy_nodes = []
p.attached_documents = []
p.user_files = []
p.llm_model_version_override = None
p.llm_model_provider_override = None
p.uploaded_image_id = None
p.icon_name = None
p.is_public = True
p.is_listed = True
p.display_priority = None
p.is_featured = False
p.builtin_persona = False
p.labels = []
p.user = None
for k, v in overrides.items():
setattr(p, k, v)
return p
def _make_cc_pair(source: DocumentSource) -> MagicMock:
cc = MagicMock()
cc.connector.source = source
cc.name = source.value
cc.id = 1
cc.access_type = "PUBLIC"
return cc
def _make_doc_set(
cc_pairs: list[MagicMock] | None = None,
fed_connectors: list[MagicMock] | None = None,
) -> MagicMock:
ds = MagicMock()
ds.id = 1
ds.name = "ds"
ds.description = None
ds.is_up_to_date = True
ds.is_public = True
ds.users = []
ds.groups = []
ds.connector_credential_pairs = cc_pairs or []
ds.federated_connectors = fed_connectors or []
return ds
def _make_federated_ds_mapping(
source: FederatedConnectorSource,
) -> MagicMock:
mapping = MagicMock()
mapping.federated_connector.source = source
mapping.federated_connector_id = 1
mapping.entities = {}
return mapping
def _make_hierarchy_node(source: DocumentSource) -> MagicMock:
node = MagicMock()
node.source = source
return node
def _make_attached_document(source: DocumentSource) -> MagicMock:
doc = MagicMock()
doc.parent_hierarchy_node = MagicMock()
doc.parent_hierarchy_node.source = source
return doc
@patch(
"onyx.server.features.persona.models.DocumentSetSummary.from_model",
return_value=_STUB_DS_SUMMARY,
)
def test_empty_persona_has_no_knowledge_sources(_mock_ds: MagicMock) -> None:
persona = _make_persona()
snapshot = MinimalPersonaSnapshot.from_model(persona)
assert snapshot.knowledge_sources == []
@patch(
"onyx.server.features.persona.models.DocumentSetSummary.from_model",
return_value=_STUB_DS_SUMMARY,
)
def test_user_files_adds_user_file_source(_mock_ds: MagicMock) -> None:
persona = _make_persona(user_files=[MagicMock()])
snapshot = MinimalPersonaSnapshot.from_model(persona)
assert DocumentSource.USER_FILE in snapshot.knowledge_sources
@patch(
"onyx.server.features.persona.models.DocumentSetSummary.from_model",
return_value=_STUB_DS_SUMMARY,
)
def test_no_user_files_excludes_user_file_source(_mock_ds: MagicMock) -> None:
cc = _make_cc_pair(DocumentSource.CONFLUENCE)
ds = _make_doc_set(cc_pairs=[cc])
persona = _make_persona(document_sets=[ds])
snapshot = MinimalPersonaSnapshot.from_model(persona)
assert DocumentSource.USER_FILE not in snapshot.knowledge_sources
assert DocumentSource.CONFLUENCE in snapshot.knowledge_sources
@patch(
"onyx.server.features.persona.models.DocumentSetSummary.from_model",
return_value=_STUB_DS_SUMMARY,
)
def test_federated_connector_in_doc_set(_mock_ds: MagicMock) -> None:
fed = _make_federated_ds_mapping(FederatedConnectorSource.FEDERATED_SLACK)
ds = _make_doc_set(fed_connectors=[fed])
persona = _make_persona(document_sets=[ds])
snapshot = MinimalPersonaSnapshot.from_model(persona)
assert DocumentSource.SLACK in snapshot.knowledge_sources
@patch(
"onyx.server.features.persona.models.DocumentSetSummary.from_model",
return_value=_STUB_DS_SUMMARY,
)
def test_hierarchy_nodes_and_attached_documents(_mock_ds: MagicMock) -> None:
node = _make_hierarchy_node(DocumentSource.GOOGLE_DRIVE)
doc = _make_attached_document(DocumentSource.SHAREPOINT)
persona = _make_persona(hierarchy_nodes=[node], attached_documents=[doc])
snapshot = MinimalPersonaSnapshot.from_model(persona)
assert DocumentSource.GOOGLE_DRIVE in snapshot.knowledge_sources
assert DocumentSource.SHAREPOINT in snapshot.knowledge_sources
@patch(
"onyx.server.features.persona.models.DocumentSetSummary.from_model",
return_value=_STUB_DS_SUMMARY,
)
def test_all_source_types_combined(_mock_ds: MagicMock) -> None:
cc = _make_cc_pair(DocumentSource.CONFLUENCE)
fed = _make_federated_ds_mapping(FederatedConnectorSource.FEDERATED_SLACK)
ds = _make_doc_set(cc_pairs=[cc], fed_connectors=[fed])
node = _make_hierarchy_node(DocumentSource.GOOGLE_DRIVE)
doc = _make_attached_document(DocumentSource.SHAREPOINT)
persona = _make_persona(
document_sets=[ds],
hierarchy_nodes=[node],
attached_documents=[doc],
user_files=[MagicMock()],
)
snapshot = MinimalPersonaSnapshot.from_model(persona)
sources = set(snapshot.knowledge_sources)
assert sources == {
DocumentSource.CONFLUENCE,
DocumentSource.SLACK,
DocumentSource.GOOGLE_DRIVE,
DocumentSource.SHAREPOINT,
DocumentSource.USER_FILE,
}

View File

@@ -100,6 +100,39 @@ class TestGenerateOllamaDisplayName:
result = generate_ollama_display_name("llama3.3:70b")
assert "3.3" in result or "3 3" in result # Either format is acceptable
def test_non_size_tag_shown(self) -> None:
"""Test that non-size tags like 'e4b' are included in the display name."""
result = generate_ollama_display_name("gemma4:e4b")
assert "Gemma" in result
assert "4" in result
assert "E4B" in result
def test_size_with_cloud_modifier(self) -> None:
"""Test size tag with cloud modifier."""
result = generate_ollama_display_name("deepseek-v3.1:671b-cloud")
assert "DeepSeek" in result
assert "671B" in result
assert "Cloud" in result
def test_size_with_multiple_modifiers(self) -> None:
"""Test size tag with multiple modifiers."""
result = generate_ollama_display_name("qwen3-vl:235b-instruct-cloud")
assert "Qwen" in result
assert "235B" in result
assert "Instruct" in result
assert "Cloud" in result
def test_quantization_tag_shown(self) -> None:
"""Test that quantization tags are included in the display name."""
result = generate_ollama_display_name("llama3:q4_0")
assert "Llama" in result
assert "Q4_0" in result
def test_cloud_only_tag(self) -> None:
"""Test standalone cloud tag."""
result = generate_ollama_display_name("glm-4.6:cloud")
assert "CLOUD" in result
class TestStripOpenrouterVendorPrefix:
"""Tests for OpenRouter vendor prefix stripping."""

View File

@@ -1,16 +1,11 @@
"""Tests for indexing pipeline Prometheus collectors."""
from collections.abc import Iterator
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
@@ -18,7 +13,7 @@ from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
def _mock_broker_client() -> Iterator[None]:
"""Patch celery_get_broker_client for all collector tests."""
with patch(
"onyx.background.celery.celery_redis.celery_get_broker_client",
"onyx.server.metrics.indexing_pipeline.celery_get_broker_client",
return_value=MagicMock(),
):
yield
@@ -137,212 +132,3 @@ class TestQueueDepthCollector:
stale_result = collector.collect()
assert stale_result is good_result
class TestIndexAttemptCollector:
def test_returns_empty_when_not_configured(self) -> None:
collector = IndexAttemptCollector()
assert collector.collect() == []
def test_returns_empty_describe(self) -> None:
collector = IndexAttemptCollector()
assert collector.describe() == []
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
def test_collects_index_attempts(
self,
mock_get_session: MagicMock,
mock_get_tenants: MagicMock,
) -> None:
collector = IndexAttemptCollector(cache_ttl=0)
collector.configure()
mock_get_tenants.return_value = ["public"]
mock_session = MagicMock()
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
from onyx.db.enums import IndexingStatus
mock_row = (
IndexingStatus.IN_PROGRESS,
MagicMock(value="web"),
81,
"Table Tennis Blade Guide",
2,
)
mock_session.query.return_value.join.return_value.join.return_value.filter.return_value.group_by.return_value.all.return_value = [
mock_row
]
families = collector.collect()
assert len(families) == 1
assert families[0].name == "onyx_index_attempts_active"
assert len(families[0].samples) == 1
sample = families[0].samples[0]
assert sample.labels == {
"status": "in_progress",
"source": "web",
"tenant_id": "public",
"connector_name": "Table Tennis Blade Guide",
"cc_pair_id": "81",
}
assert sample.value == 2
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
def test_handles_db_error_gracefully(
self,
mock_get_tenants: MagicMock,
) -> None:
collector = IndexAttemptCollector(cache_ttl=0)
collector.configure()
mock_get_tenants.side_effect = Exception("DB down")
families = collector.collect()
# No stale cache, so returns empty
assert families == []
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
def test_skips_none_tenant_ids(
self,
mock_get_tenants: MagicMock,
) -> None:
collector = IndexAttemptCollector(cache_ttl=0)
collector.configure()
mock_get_tenants.return_value = [None]
families = collector.collect()
assert len(families) == 1 # Returns the gauge family, just with no samples
assert len(families[0].samples) == 0
class TestConnectorHealthCollector:
def test_returns_empty_when_not_configured(self) -> None:
collector = ConnectorHealthCollector()
assert collector.collect() == []
def test_returns_empty_describe(self) -> None:
collector = ConnectorHealthCollector()
assert collector.describe() == []
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
def test_collects_connector_health(
self,
mock_get_session: MagicMock,
mock_get_tenants: MagicMock,
) -> None:
collector = ConnectorHealthCollector(cache_ttl=0)
collector.configure()
mock_get_tenants.return_value = ["public"]
mock_session = MagicMock()
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
now = datetime.now(tz=timezone.utc)
last_success = now - timedelta(hours=2)
mock_status = MagicMock(value="ACTIVE")
mock_source = MagicMock(value="google_drive")
# Row: (id, status, in_error, last_success, name, source)
mock_row = (
42,
mock_status,
True, # in_repeated_error_state
last_success,
"My GDrive Connector",
mock_source,
)
mock_session.query.return_value.join.return_value.all.return_value = [mock_row]
# Mock the index attempt queries (error counts + docs counts)
mock_session.query.return_value.filter.return_value.group_by.return_value.all.return_value = (
[]
)
families = collector.collect()
assert len(families) == 6
names = {f.name for f in families}
assert names == {
"onyx_connector_last_success_age_seconds",
"onyx_connector_in_error_state",
"onyx_connectors_by_status",
"onyx_connectors_in_error_total",
"onyx_connector_docs_indexed",
"onyx_connector_error_count",
}
staleness = next(
f for f in families if f.name == "onyx_connector_last_success_age_seconds"
)
assert len(staleness.samples) == 1
assert staleness.samples[0].value == pytest.approx(7200, abs=5)
error_state = next(
f for f in families if f.name == "onyx_connector_in_error_state"
)
assert error_state.samples[0].value == 1.0
by_status = next(f for f in families if f.name == "onyx_connectors_by_status")
assert by_status.samples[0].labels == {
"tenant_id": "public",
"status": "ACTIVE",
}
assert by_status.samples[0].value == 1
error_total = next(
f for f in families if f.name == "onyx_connectors_in_error_total"
)
assert error_total.samples[0].value == 1
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
@patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
def test_skips_staleness_when_no_last_success(
self,
mock_get_session: MagicMock,
mock_get_tenants: MagicMock,
) -> None:
collector = ConnectorHealthCollector(cache_ttl=0)
collector.configure()
mock_get_tenants.return_value = ["public"]
mock_session = MagicMock()
mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
mock_status = MagicMock(value="INITIAL_INDEXING")
mock_source = MagicMock(value="slack")
mock_row = (
10,
mock_status,
False,
None, # no last_successful_index_time
0,
mock_source,
)
mock_session.query.return_value.join.return_value.all.return_value = [mock_row]
families = collector.collect()
staleness = next(
f for f in families if f.name == "onyx_connector_last_success_age_seconds"
)
assert len(staleness.samples) == 0
@patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
def test_handles_db_error_gracefully(
self,
mock_get_tenants: MagicMock,
) -> None:
collector = ConnectorHealthCollector(cache_ttl=0)
collector.configure()
mock_get_tenants.side_effect = Exception("DB down")
families = collector.collect()
assert families == []

View File

@@ -0,0 +1,927 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": " This chart shows how long it takes for Onyx to crawl each source connector and collect the current list of documents. The Y axis represents duration in seconds (bucketed), and each band shows how many enumerations completed within that time range.",
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"scaleDistribution": {
"type": "linear"
}
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"calculate": false,
"cellGap": 1,
"color": {
"exponent": 0.5,
"fill": "dark-orange",
"mode": "scheme",
"reverse": false,
"scale": "exponential",
"scheme": "Oranges",
"steps": 64
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1e-09
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"mode": "single",
"showColorScale": false,
"yHistogram": false
},
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "s"
}
},
"pluginVersion": "10.4.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "sum(increase(onyx_pruning_enumeration_duration_seconds_bucket[30m])) by (le)",
"format": "heatmap",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Pruning Enumeration Duration",
"type": "heatmap"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "histogram_quantile(0.95, sum(rate(onyx_pruning_enumeration_duration_seconds_bucket[1h])) by (le, connector_type))",
"instant": false,
"legendFormat": "{{connector_type}}",
"range": true,
"refId": "A"
}
],
"title": "Pruning Enumeration Duration p95 by Connector",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "Shows how many pruning enumerations completed per hour, broken down by connector type. A low count means few connectors are successfully completing the enumeration phase. A count of 0 for a connector type that should be pruning indicates enumerations are timing out before completion.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "sum(increase(onyx_pruning_enumeration_duration_seconds_count[1h])) by (connector_type)",
"instant": false,
"legendFormat": "{{connector_type}}",
"range": true,
"refId": "A"
}
],
"title": "Pruning Enumeration Count",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "Shows the 95th percentile execution duration of pruning tasks. A rising p95 indicates pruning jobs are taking longer over time, potentially approaching the 6-hour timeout limit. Sustained values near 21600s (6 hours) indicate connectors with too many documents to prune within the allowed window.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "histogram_quantile(0.95, sum(rate(onyx_celery_task_duration_seconds_bucket{task_name=~\"connector_pruning.*\"}[1h])) by (le, task_name))",
"instant": false,
"legendFormat": "{{task_name}}",
"range": true,
"refId": "A"
}
],
"title": "Pruning Task Duration p95",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "Shows the number of currently executing pruning tasks on the heavy worker, broken down by task type. A value of 0 means no pruning is actively running. A sustained high count may indicate workers are saturated and new pruning jobs are queuing up.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"__systemRef": "hideSeriesFrom",
"matcher": {
"id": "byNames",
"options": {
"mode": "exclude",
"names": [
"connector_pruning_generator_task"
],
"prefix": "All except:",
"readOnly": true
}
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "sum(onyx_celery_tasks_active{queue=~\"connector_pruning.*|connector_doc_permissions.*|connector_external_group.*|csv_generation|sandbox\"}) by (task_name)",
"instant": false,
"legendFormat": "{{task_name}}",
"range": true,
"refId": "A"
}
],
"title": "Heavy Worker - Active Tasks",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "This chart shows how frequently Onyx hits rate limits from source connectors during the enumeration phase. Rate limit errors slow down or stall the document crawl, directly increasing enumeration duration. A spike here for a specific connector type indicates the source API is throttling Onyx's requests, which may explain long enumeration times for that connector.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "sum(rate(onyx_pruning_rate_limit_errors_total[5m])) by (connector_type)",
"instant": false,
"legendFormat": "{{connector_type}}",
"range": true,
"refId": "A"
}
],
"title": "Pruning Rate Limit Errors",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "Shows the rate of pruning task failures and revocations per hour. Failures indicate crashed tasks (DB errors, timeouts). Revocations indicate cancelled tasks, typically from worker restarts or deployments. Both result in orphaned fences that block future pruning attempts for affected connectors.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"id": 9,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "sum(increase(onyx_celery_task_revoked_total{task_name=~\"connector_pruning.*\"}[1h])) by (task_name)",
"hide": false,
"instant": false,
"legendFormat": "revoked",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "sum(increase(onyx_celery_task_completed_total{task_name=~\"connector_pruning.*\", outcome=\"failure\"}[1h])) by (task_name)",
"hide": false,
"instant": false,
"legendFormat": "failure",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "sum(increase(onyx_celery_task_completed_total{task_name=~\"connector_pruning.*\", outcome=\"success\"}[1h])) by (task_name)",
"hide": false,
"instant": false,
"legendFormat": "success",
"range": true,
"refId": "C"
}
],
"title": "Heavy Worker - Pruning Task Success & Failures & Revocations",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "Shows the ratio of successfully completed pruning tasks to total completed tasks. A value of 1.0 (100%) means all pruning jobs are completing cleanly. A drop indicates tasks are crashing or timing out, which leads to orphaned fences and connectors being blocked from future pruning attempts.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 24
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": " sum(rate(onyx_celery_task_completed_total{task_name=~\"connector_pruning.*\", outcome=\"success\"}[1h]))\n /\n sum(rate(onyx_celery_task_completed_total{task_name=~\"connector_pruning.*\"}[1h]))",
"instant": false,
"legendFormat": "Success Rate",
"range": true,
"refId": "A"
}
],
"title": "Heavy Worker - Pruning Task Success Rate",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "This chart shows how long it takes Onyx to compare the list of documents fetched from the source connector against what is currently indexed. The diff computes the set difference \u2014 documents that exist in the index but no longer exist in the source are flagged for removal.",
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"scaleDistribution": {
"type": "linear"
}
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 32
},
"id": 2,
"options": {
"calculate": false,
"cellGap": 1,
"color": {
"exponent": 0.5,
"fill": "dark-orange",
"mode": "scheme",
"reverse": false,
"scale": "exponential",
"scheme": "Oranges",
"steps": 64
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1e-09
},
"legend": {
"show": true
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"mode": "single",
"showColorScale": false,
"yHistogram": false
},
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "s"
}
},
"pluginVersion": "10.4.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "sum(increase(onyx_pruning_diff_duration_seconds_bucket[30m])) by (le)",
"format": "heatmap",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Pruning Diff Duration",
"type": "heatmap"
}
],
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Indexing - Pruning",
"uid": "onyx-indexing-pruning",
"version": 10,
"weekStart": ""
}

View File

@@ -38,4 +38,17 @@ metadata:
data:
onyx-redis-queues.json: |
{{- .Files.Get "dashboards/redis-queues.json" | nindent 4 }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "onyx.fullname" . }}-indexing-pruning-dashboard
labels:
{{- include "onyx.labels" . | nindent 4 }}
grafana_dashboard: "1"
annotations:
grafana_folder: "Onyx"
data:
onyx-indexing-pruning.json: |
{{- .Files.Get "dashboards/indexing-pruning.json" | nindent 4 }}
{{- end }}

View File

@@ -217,11 +217,23 @@ Enriches docfetching and docprocessing tasks with connector-level labels. Silent
| `onyx_indexing_task_completed_total` | Counter | `task_name`, `source`, `tenant_id`, `cc_pair_id`, `outcome` | Indexing tasks completed per connector |
| `onyx_indexing_task_duration_seconds` | Histogram | `task_name`, `source`, `tenant_id` | Indexing task duration by connector type |
`connector_name` is intentionally excluded from these push-based counters to avoid unbounded cardinality (it's a free-form user string). The pull-based collectors on the monitoring worker include it since they have bounded cardinality (one series per connector).
`connector_name` is intentionally excluded from these push-based counters to avoid unbounded cardinality (it's a free-form user string).
### Connector Health Metrics (`onyx.server.metrics.connector_health_metrics`)
Push-based metrics emitted by docfetching and docprocessing workers at the point where connector state changes occur. Scales to any number of tenants (no schema iteration).
| Metric | Type | Labels | Description |
| ----------------------------------------------- | ------- | --------------------------------------------- | ------------------------------------------------------------- |
| `onyx_index_attempt_transitions_total` | Counter | `tenant_id`, `source`, `cc_pair_id`, `status` | Index attempt status transitions (in_progress, success, etc.) |
| `onyx_connector_in_error_state` | Gauge | `tenant_id`, `source`, `cc_pair_id` | Whether connector is in repeated error state (1=yes, 0=no) |
| `onyx_connector_last_success_timestamp_seconds` | Gauge | `tenant_id`, `source`, `cc_pair_id` | Unix timestamp of last successful indexing |
| `onyx_connector_docs_indexed_total` | Counter | `tenant_id`, `source`, `cc_pair_id` | Total documents indexed per connector (monotonic) |
| `onyx_connector_indexing_errors_total` | Counter | `tenant_id`, `source`, `cc_pair_id` | Total failed index attempts per connector (monotonic) |
### Pull-Based Collectors (`onyx.server.metrics.indexing_pipeline`)
Registered only in the **Monitoring** worker. Collectors query Redis/Postgres at scrape time with a 30-second TTL cache.
Registered only in the **Monitoring** worker. Collectors query Redis at scrape time with a 30-second TTL cache and a 120-second timeout to prevent the `/metrics` endpoint from hanging.
| Metric | Type | Labels | Description |
| ------------------------------------ | ----- | ------- | ----------------------------------- |
@@ -229,8 +241,6 @@ Registered only in the **Monitoring** worker. Collectors query Redis/Postgres at
| `onyx_queue_unacked` | Gauge | `queue` | Unacknowledged messages per queue |
| `onyx_queue_oldest_task_age_seconds` | Gauge | `queue` | Age of the oldest task in the queue |
Plus additional connector health, index attempt, and worker heartbeat metrics — see `indexing_pipeline.py` for the full list.
### Adding Metrics to a Worker
Currently only the docfetching and docprocessing workers have push-based task metrics wired up. To add metrics to another worker (e.g. heavy, light, primary):

43
profiling/README.md Normal file
View File

@@ -0,0 +1,43 @@
# Onyx Local Monitoring Stack
Prometheus + Grafana for local development. Pre-loaded with dashboards for the Onyx backend.
## Usage
```bash
cd profiling/
docker compose up -d
```
| Service | URL | Credentials |
|------------|------------------------------|---------------|
| Grafana | http://localhost:3001 | admin / admin |
| Prometheus | http://localhost:9090 | — |
## Dashboards
- **Onyx DB Pool Health** — PostgreSQL connection pool utilization
- **Onyx Indexing Pipeline v2** — Per-connector indexing throughput, queue depth, task latency
## Scrape targets
| Job | Port | Source |
|--------------------------|-------|-------------------------------|
| `onyx-api-server` | 8080 | FastAPI `/metrics` (matches `.vscode/launch.json`) |
| `onyx-monitoring-worker` | 9096 | Celery monitoring worker |
| `onyx-docfetching-worker`| 9092 | Celery docfetching worker |
| `onyx-docprocessing-worker`| 9093 | Celery docprocessing worker |
## Environment variables
Override defaults with a `.env` file in this directory or by setting them in your shell:
| Variable | Default | Description |
|---------------------|---------|---------------------------------|
| `PROMETHEUS_PORT` | `9090` | Host port for Prometheus UI |
| `GRAFANA_PORT` | `3001` | Host port for Grafana UI |
| `GF_ADMIN_PASSWORD` | `admin` | Grafana admin password |
## Editing dashboards
`allowUiUpdates: true` is set in the provisioning config, so you can edit dashboards in the Grafana UI. However, **changes don't persist** across `docker compose down` — to keep edits, export the dashboard JSON and overwrite the file in `grafana/dashboards/onyx/`.

View File

@@ -0,0 +1,38 @@
services:
# Prometheus - time series database for metrics
prometheus:
image: prom/prometheus:v3.2.1
ports:
- "${PROMETHEUS_PORT:-9090}:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=200h'
- '--web.enable-lifecycle'
extra_hosts:
- "host.docker.internal:host-gateway"
# Grafana - dashboards and visualization
grafana:
image: grafana/grafana:11.6.0
ports:
- "${GRAFANA_PORT:-3001}:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=${GF_ADMIN_PASSWORD:-admin}
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning
- ./grafana/dashboards:/var/lib/grafana/dashboards
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on:
- prometheus
volumes:
prometheus_data:
grafana_data:

View File

@@ -0,0 +1,561 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": { "type": "grafana", "uid": "-- Grafana --" },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [],
"liveNow": true,
"panels": [
{
"title": "Pool Connections Checked Out (sync)",
"description": "Number of connections currently held by application code. Should spike briefly then return to ~0 with the fix. Without the fix, this climbs to match concurrent streams and stays there.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 },
"id": 1,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisLabel": "connections",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "scheme",
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": { "type": "linear" },
"showPoints": "never",
"spanNulls": false,
"stacking": { "group": "A", "mode": "none" },
"thresholdsStyle": { "mode": "dashed" }
},
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 20 },
{ "color": "red", "value": 40 }
]
},
"min": 0,
"max": 50
},
"overrides": [
{
"matcher": { "id": "byName", "options": "pool_size" },
"properties": [
{ "id": "custom.drawStyle", "value": "line" },
{ "id": "custom.lineStyle", "value": { "fill": "dash", "dash": [10, 10] } },
{ "id": "custom.fillOpacity", "value": 0 },
{ "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } }
]
}
]
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "onyx_db_pool_checked_out{engine=\"sync\"}",
"legendFormat": "checked_out",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "onyx_db_pool_size{engine=\"sync\"}",
"legendFormat": "pool_size",
"refId": "B"
}
]
},
{
"title": "Pool Connections Checked Out (all engines)",
"description": "Checked out connections across sync, async, and readonly engines.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 },
"id": 2,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisLabel": "connections",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 20,
"lineInterpolation": "smooth",
"lineWidth": 2,
"pointSize": 5,
"showPoints": "never",
"stacking": { "group": "A", "mode": "normal" }
},
"min": 0
},
"overrides": []
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "onyx_db_pool_checked_out",
"legendFormat": "{{engine}}",
"refId": "A"
}
]
},
{
"title": "Connections Held by Endpoint",
"description": "Which API handlers are currently holding DB connections. The chat streaming endpoint should drop to 0 during Phase 2 with the fix.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
"id": 3,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisLabel": "connections",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 20,
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never",
"stacking": { "group": "A", "mode": "normal" }
},
"min": 0
},
"overrides": []
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "onyx_db_connections_held_by_endpoint{engine=\"sync\"} > 0",
"legendFormat": "{{handler}}",
"refId": "A"
}
]
},
{
"title": "Connection Hold Duration (p50 / p95 / p99)",
"description": "How long connections are held before being returned. With the fix, p95 should drop from stream-duration (30s+) to sub-second.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
"id": 4,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisLabel": "seconds",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 10,
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never"
},
"unit": "s",
"min": 0
},
"overrides": []
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.50, sum by (le)(rate(onyx_db_connection_hold_seconds_bucket{engine=\"sync\"}[1m])))",
"legendFormat": "sync p50",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.95, sum by (le)(rate(onyx_db_connection_hold_seconds_bucket{engine=\"sync\"}[1m])))",
"legendFormat": "sync p95",
"refId": "B"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.99, sum by (le)(rate(onyx_db_connection_hold_seconds_bucket{engine=\"sync\"}[1m])))",
"legendFormat": "sync p99",
"refId": "C"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.99, sum by (le)(rate(onyx_db_connection_hold_seconds_bucket{engine=\"async\"}[1m])))",
"legendFormat": "async p99",
"refId": "D"
}
]
},
{
"title": "Async vs Sync Hold Duration (p99)",
"description": "Compares connection hold times between sync (our fix) and async (auth middleware). Sync should be sub-second after fix. Async stays high because FastAPI auth dependency holds a session for the entire StreamingResponse lifetime.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
"id": 11,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisLabel": "seconds",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 10,
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never"
},
"unit": "s",
"min": 0
},
"overrides": [
{
"matcher": { "id": "byName", "options": "sync p99" },
"properties": [
{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }
]
},
{
"matcher": { "id": "byName", "options": "async p99" },
"properties": [
{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }
]
}
]
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.99, sum by (le)(rate(onyx_db_connection_hold_seconds_bucket{engine=\"sync\"}[1m])))",
"legendFormat": "sync p99",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "histogram_quantile(0.99, sum by (le)(rate(onyx_db_connection_hold_seconds_bucket{engine=\"async\"}[1m])))",
"legendFormat": "async p99",
"refId": "B"
}
]
},
{
"title": "Async Connections Held (auth middleware)",
"description": "Async engine connections checked out — these are held by FastAPI's auth dependency for the entire StreamingResponse lifetime. This is the NEXT bottleneck to fix after the sync session changes.",
"type": "timeseries",
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
"id": 12,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisLabel": "connections",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "scheme",
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never",
"thresholdsStyle": { "mode": "dashed" }
},
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 15 },
{ "color": "red", "value": 30 }
]
},
"min": 0
},
"overrides": []
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "onyx_db_pool_checked_out{engine=\"async\"}",
"legendFormat": "async checked_out",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "onyx_db_pool_size{engine=\"async\"}",
"legendFormat": "async pool_size",
"refId": "B"
}
]
},
{
"title": "Pool Checkout Rate (per second)",
"description": "Rate of connection checkouts. With the fix, each chat creates multiple short checkouts (Phase 1 + Phase 3) instead of one long one.",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 30 },
"id": 5,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisLabel": "checkouts/s",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 20,
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never"
},
"unit": "ops",
"min": 0
},
"overrides": []
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "rate(onyx_db_pool_checkout_total{engine=\"sync\"}[30s])",
"legendFormat": "sync checkouts/s",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "rate(onyx_db_pool_checkin_total{engine=\"sync\"}[30s])",
"legendFormat": "sync checkins/s",
"refId": "B"
}
]
},
{
"title": "Pool Overflow & Timeouts",
"description": "Overflow = connections beyond pool_size. Timeouts = requests that couldn't get a connection. Any timeout is a user-facing error.",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 30 },
"id": 6,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 20,
"lineInterpolation": "smooth",
"lineWidth": 2,
"showPoints": "never"
},
"min": 0
},
"overrides": [
{
"matcher": { "id": "byName", "options": "timeouts" },
"properties": [
{ "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } },
{ "id": "custom.fillOpacity", "value": 50 }
]
}
]
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "onyx_db_pool_overflow{engine=\"sync\"}",
"legendFormat": "overflow (sync)",
"refId": "A"
},
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "increase(onyx_db_pool_checkout_timeout_total{engine=\"sync\"}[30s])",
"legendFormat": "timeouts",
"refId": "B"
}
]
},
{
"title": "Current Pool State",
"description": "Snapshot of pool health right now.",
"type": "stat",
"gridPos": { "h": 6, "w": 6, "x": 0, "y": 38 },
"id": 7,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 15 },
{ "color": "red", "value": 35 }
]
}
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "area",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "onyx_db_pool_checked_out{engine=\"sync\"}",
"legendFormat": "Checked Out (sync)",
"refId": "A"
}
]
},
{
"title": "Total Checkout Timeouts",
"description": "Cumulative pool checkout timeouts — each one is a failed request.",
"type": "stat",
"gridPos": { "h": 6, "w": 6, "x": 6, "y": 38 },
"id": 8,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "red", "value": 1 }
]
}
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "none",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "sum(onyx_db_pool_checkout_timeout_total)",
"legendFormat": "Timeouts",
"refId": "A"
}
]
},
{
"title": "Pool Utilization %",
"description": "Percentage of pool_size currently checked out.",
"type": "gauge",
"gridPos": { "h": 6, "w": 6, "x": 12, "y": 38 },
"id": 9,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 50 },
{ "color": "red", "value": 80 }
]
},
"min": 0,
"max": 100,
"unit": "percent"
},
"overrides": []
},
"options": {
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "onyx_db_pool_checked_out{engine=\"sync\"} / onyx_db_pool_size{engine=\"sync\"} * 100",
"legendFormat": "Utilization",
"refId": "A"
}
]
},
{
"title": "Total Checkouts",
"description": "Cumulative connection checkouts since server start.",
"type": "stat",
"gridPos": { "h": 6, "w": 6, "x": 18, "y": 38 },
"id": 10,
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [{ "color": "blue", "value": null }]
}
},
"overrides": []
},
"options": {
"colorMode": "background",
"graphMode": "area",
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
"textMode": "auto"
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
"expr": "sum(onyx_db_pool_checkout_total{engine=\"sync\"})",
"legendFormat": "Total Checkouts",
"refId": "A"
}
]
}
],
"refresh": "5s",
"schemaVersion": 37,
"style": "dark",
"tags": ["onyx", "db-pool", "load-test"],
"templating": {
"list": [
{
"current": { "text": "Prometheus", "value": "prometheus" },
"includeAll": false,
"name": "DS_PROMETHEUS",
"options": [],
"query": "prometheus",
"refresh": 1,
"type": "datasource"
}
]
},
"time": { "from": "now-15m", "to": "now" },
"timepicker": {
"refresh_intervals": ["5s", "10s", "30s", "1m"]
},
"timezone": "",
"title": "Onyx DB Pool Health",
"uid": "onyx-db-pool-health",
"version": 0,
"weekStart": ""
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: 'onyx-dashboards'
orgId: 1
folder: 'Onyx'
type: file
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards/onyx
foldersFromFilesStructure: false

View File

@@ -0,0 +1,10 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
uid: PBFA97CFB590B2093
editable: true

36
profiling/prometheus.yml Normal file
View File

@@ -0,0 +1,36 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
# Prometheus self-monitoring
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
# Onyx API server — exposes /metrics via prometheus-fastapi-instrumentator
# Port matches the API Server launch config in .vscode/launch.json
- job_name: 'onyx-api-server'
scrape_interval: 5s
metrics_path: /metrics
static_configs:
- targets: ['host.docker.internal:8080']
# Onyx celery workers — each exposes /metrics on a dedicated port
- job_name: 'onyx-monitoring-worker'
scrape_interval: 5s
metrics_path: /metrics
static_configs:
- targets: ['host.docker.internal:9096']
- job_name: 'onyx-docfetching-worker'
scrape_interval: 5s
metrics_path: /metrics
static_configs:
- targets: ['host.docker.internal:9092']
- job_name: 'onyx-docprocessing-worker'
scrape_interval: 5s
metrics_path: /metrics
static_configs:
- targets: ['host.docker.internal:9093']

View File

@@ -60,7 +60,7 @@ backend = [
"httpcore==1.0.9",
"httpx[http2]==0.28.1",
"httpx-oauth==0.15.1",
"huggingface-hub==0.35.3",
"huggingface-hub==1.10.2",
"inflection==0.5.1",
"jira==3.10.5",
"jsonref==1.1.0",
@@ -84,7 +84,7 @@ backend = [
"openpyxl==3.0.10",
"opensearch-py==3.0.0",
"passlib==1.7.4",
"playwright==1.55.0",
"playwright==1.58.0",
"psutil==7.1.3",
"psycopg2-binary==2.9.9",
"puremagic==1.28",
@@ -189,9 +189,9 @@ model_server = [
"einops==0.8.1",
"numpy==2.4.1",
"safetensors==0.5.3",
"sentence-transformers==4.0.2",
"sentence-transformers==5.4.1",
"torch==2.9.1",
"transformers==4.53.0",
"transformers==5.5.4",
"sentry-sdk[fastapi,celery,starlette]==2.14.0",
]
@@ -227,11 +227,6 @@ module = "generated.*"
follow_imports = "silent"
ignore_errors = true
[[tool.mypy.overrides]]
module = "transformers.*"
follow_imports = "skip"
ignore_errors = true
[tool.uv.workspace]
members = ["tools/ods"]

View File

@@ -154,11 +154,11 @@ func worktreeGitMount(root string) (string, bool) {
func sshAgentMount() (string, bool) {
sock := os.Getenv("SSH_AUTH_SOCK")
if sock == "" {
log.Debug("SSH_AUTH_SOCK not set — skipping SSH agent forwarding")
log.Warn("SSH_AUTH_SOCK not set — SSH agent forwarding disabled (git over SSH won't work inside the container)")
return "", false
}
if _, err := os.Stat(sock); err != nil {
log.Debugf("SSH_AUTH_SOCK=%s not accessible: %v", sock, err)
log.Warnf("SSH_AUTH_SOCK=%s not accessible — SSH agent forwarding disabled: %v", sock, err)
return "", false
}
mount := fmt.Sprintf("type=bind,source=%s,target=/tmp/ssh-agent.sock", sock)

136
uv.lock generated
View File

@@ -2301,31 +2301,34 @@ wheels = [
[[package]]
name = "hf-xet"
version = "1.2.0"
version = "1.4.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020, upload-time = "2025-10-24T19:04:32.129Z" }
sdist = { url = "https://files.pythonhosted.org/packages/53/92/ec9ad04d0b5728dca387a45af7bc98fbb0d73b2118759f5f6038b61a57e8/hf_xet-1.4.3.tar.gz", hash = "sha256:8ddedb73c8c08928c793df2f3401ec26f95be7f7e516a7bee2fbb546f6676113", size = 670477, upload-time = "2026-03-31T22:40:07.874Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9e/a5/85ef910a0aa034a2abcfadc360ab5ac6f6bc4e9112349bd40ca97551cff0/hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649", size = 2861870, upload-time = "2025-10-24T19:04:11.422Z" },
{ url = "https://files.pythonhosted.org/packages/ea/40/e2e0a7eb9a51fe8828ba2d47fe22a7e74914ea8a0db68a18c3aa7449c767/hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813", size = 2717584, upload-time = "2025-10-24T19:04:09.586Z" },
{ url = "https://files.pythonhosted.org/packages/a5/7d/daf7f8bc4594fdd59a8a596f9e3886133fdc68e675292218a5e4c1b7e834/hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc", size = 3315004, upload-time = "2025-10-24T19:04:00.314Z" },
{ url = "https://files.pythonhosted.org/packages/b1/ba/45ea2f605fbf6d81c8b21e4d970b168b18a53515923010c312c06cd83164/hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5", size = 3222636, upload-time = "2025-10-24T19:03:58.111Z" },
{ url = "https://files.pythonhosted.org/packages/4a/1d/04513e3cab8f29ab8c109d309ddd21a2705afab9d52f2ba1151e0c14f086/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f", size = 3408448, upload-time = "2025-10-24T19:04:20.951Z" },
{ url = "https://files.pythonhosted.org/packages/f0/7c/60a2756d7feec7387db3a1176c632357632fbe7849fce576c5559d4520c7/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832", size = 3503401, upload-time = "2025-10-24T19:04:22.549Z" },
{ url = "https://files.pythonhosted.org/packages/4e/64/48fffbd67fb418ab07451e4ce641a70de1c40c10a13e25325e24858ebe5a/hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382", size = 2900866, upload-time = "2025-10-24T19:04:33.461Z" },
{ url = "https://files.pythonhosted.org/packages/e2/51/f7e2caae42f80af886db414d4e9885fac959330509089f97cccb339c6b87/hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e", size = 2861861, upload-time = "2025-10-24T19:04:19.01Z" },
{ url = "https://files.pythonhosted.org/packages/6e/1d/a641a88b69994f9371bd347f1dd35e5d1e2e2460a2e350c8d5165fc62005/hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8", size = 2717699, upload-time = "2025-10-24T19:04:17.306Z" },
{ url = "https://files.pythonhosted.org/packages/df/e0/e5e9bba7d15f0318955f7ec3f4af13f92e773fbb368c0b8008a5acbcb12f/hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0", size = 3314885, upload-time = "2025-10-24T19:04:07.642Z" },
{ url = "https://files.pythonhosted.org/packages/21/90/b7fe5ff6f2b7b8cbdf1bd56145f863c90a5807d9758a549bf3d916aa4dec/hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090", size = 3221550, upload-time = "2025-10-24T19:04:05.55Z" },
{ url = "https://files.pythonhosted.org/packages/6f/cb/73f276f0a7ce46cc6a6ec7d6c7d61cbfe5f2e107123d9bbd0193c355f106/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a", size = 3408010, upload-time = "2025-10-24T19:04:28.598Z" },
{ url = "https://files.pythonhosted.org/packages/b8/1e/d642a12caa78171f4be64f7cd9c40e3ca5279d055d0873188a58c0f5fbb9/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f", size = 3503264, upload-time = "2025-10-24T19:04:30.397Z" },
{ url = "https://files.pythonhosted.org/packages/17/b5/33764714923fa1ff922770f7ed18c2daae034d21ae6e10dbf4347c854154/hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc", size = 2901071, upload-time = "2025-10-24T19:04:37.463Z" },
{ url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099, upload-time = "2025-10-24T19:04:15.366Z" },
{ url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178, upload-time = "2025-10-24T19:04:13.695Z" },
{ url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214, upload-time = "2025-10-24T19:04:03.596Z" },
{ url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054, upload-time = "2025-10-24T19:04:01.949Z" },
{ url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812, upload-time = "2025-10-24T19:04:24.585Z" },
{ url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920, upload-time = "2025-10-24T19:04:26.927Z" },
{ url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" },
{ url = "https://files.pythonhosted.org/packages/72/43/724d307b34e353da0abd476e02f72f735cdd2bc86082dee1b32ea0bfee1d/hf_xet-1.4.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:7551659ba4f1e1074e9623996f28c3873682530aee0a846b7f2f066239228144", size = 3800935, upload-time = "2026-03-31T22:39:49.618Z" },
{ url = "https://files.pythonhosted.org/packages/2b/d2/8bee5996b699262edb87dbb54118d287c0e1b2fc78af7cdc41857ba5e3c4/hf_xet-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bee693ada985e7045997f05f081d0e12c4c08bd7626dc397f8a7c487e6c04f7f", size = 3558942, upload-time = "2026-03-31T22:39:47.938Z" },
{ url = "https://files.pythonhosted.org/packages/c3/a1/e993d09cbe251196fb60812b09a58901c468127b7259d2bf0f68bf6088eb/hf_xet-1.4.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21644b404bb0100fe3857892f752c4d09642586fd988e61501c95bbf44b393a3", size = 4207657, upload-time = "2026-03-31T22:39:39.69Z" },
{ url = "https://files.pythonhosted.org/packages/64/44/9eb6d21e5c34c63e5e399803a6932fa983cabdf47c0ecbcfe7ea97684b8c/hf_xet-1.4.3-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:987f09cfe418237812896a6736b81b1af02a3a6dcb4b4944425c4c4fca7a7cf8", size = 3986765, upload-time = "2026-03-31T22:39:37.936Z" },
{ url = "https://files.pythonhosted.org/packages/ea/7b/8ad6f16fdb82f5f7284a34b5ec48645bd575bdcd2f6f0d1644775909c486/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:60cf7fc43a99da0a853345cf86d23738c03983ee5249613a6305d3e57a5dca74", size = 4188162, upload-time = "2026-03-31T22:39:58.382Z" },
{ url = "https://files.pythonhosted.org/packages/1b/c4/39d6e136cbeea9ca5a23aad4b33024319222adbdc059ebcda5fc7d9d5ff4/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2815a49a7a59f3e2edf0cf113ae88e8cb2ca2a221bf353fb60c609584f4884d4", size = 4424525, upload-time = "2026-03-31T22:40:00.225Z" },
{ url = "https://files.pythonhosted.org/packages/46/f2/adc32dae6bdbc367853118b9878139ac869419a4ae7ba07185dc31251b76/hf_xet-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:42ee323265f1e6a81b0e11094564fb7f7e0ec75b5105ffd91ae63f403a11931b", size = 3671610, upload-time = "2026-03-31T22:40:10.42Z" },
{ url = "https://files.pythonhosted.org/packages/e2/19/25d897dcc3f81953e0c2cde9ec186c7a0fee413eb0c9a7a9130d87d94d3a/hf_xet-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:27c976ba60079fb8217f485b9c5c7fcd21c90b0367753805f87cb9f3cdc4418a", size = 3528529, upload-time = "2026-03-31T22:40:09.106Z" },
{ url = "https://files.pythonhosted.org/packages/ec/36/3e8f85ca9fe09b8de2b2e10c63b3b3353d7dda88a0b3d426dffbe7b8313b/hf_xet-1.4.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:5251d5ece3a81815bae9abab41cf7ddb7bcb8f56411bce0827f4a3071c92fdc6", size = 3801019, upload-time = "2026-03-31T22:39:56.651Z" },
{ url = "https://files.pythonhosted.org/packages/b5/9c/defb6cb1de28bccb7bd8d95f6e60f72a3d3fa4cb3d0329c26fb9a488bfe7/hf_xet-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1feb0f3abeacee143367c326a128a2e2b60868ec12a36c225afb1d6c5a05e6d2", size = 3558746, upload-time = "2026-03-31T22:39:54.766Z" },
{ url = "https://files.pythonhosted.org/packages/c1/bd/8d001191893178ff8e826e46ad5299446e62b93cd164e17b0ffea08832ec/hf_xet-1.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b301fc150290ca90b4fccd079829b84bb4786747584ae08b94b4577d82fb791", size = 4207692, upload-time = "2026-03-31T22:39:46.246Z" },
{ url = "https://files.pythonhosted.org/packages/ce/48/6790b402803250e9936435613d3a78b9aaeee7973439f0918848dde58309/hf_xet-1.4.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:d972fbe95ddc0d3c0fc49b31a8a69f47db35c1e3699bf316421705741aab6653", size = 3986281, upload-time = "2026-03-31T22:39:44.648Z" },
{ url = "https://files.pythonhosted.org/packages/51/56/ea62552fe53db652a9099eda600b032d75554d0e86c12a73824bfedef88b/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c5b48db1ee344a805a1b9bd2cda9b6b65fe77ed3787bd6e87ad5521141d317cd", size = 4187414, upload-time = "2026-03-31T22:40:04.951Z" },
{ url = "https://files.pythonhosted.org/packages/7d/f5/bc1456d4638061bea997e6d2db60a1a613d7b200e0755965ec312dc1ef79/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:22bdc1f5fb8b15bf2831440b91d1c9bbceeb7e10c81a12e8d75889996a5c9da8", size = 4424368, upload-time = "2026-03-31T22:40:06.347Z" },
{ url = "https://files.pythonhosted.org/packages/e4/76/ab597bae87e1f06d18d3ecb8ed7f0d3c9a37037fc32ce76233d369273c64/hf_xet-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:0392c79b7cf48418cd61478c1a925246cf10639f4cd9d94368d8ca1e8df9ea07", size = 3672280, upload-time = "2026-03-31T22:40:16.401Z" },
{ url = "https://files.pythonhosted.org/packages/62/05/2e462d34e23a09a74d73785dbed71cc5dbad82a72eee2ad60a72a554155d/hf_xet-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:681c92a07796325778a79d76c67011764ecc9042a8c3579332b61b63ae512075", size = 3528945, upload-time = "2026-03-31T22:40:14.995Z" },
{ url = "https://files.pythonhosted.org/packages/ac/9f/9c23e4a447b8f83120798f9279d0297a4d1360bdbf59ef49ebec78fe2545/hf_xet-1.4.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d0da85329eaf196e03e90b84c2d0aca53bd4573d097a75f99609e80775f98025", size = 3805048, upload-time = "2026-03-31T22:39:53.105Z" },
{ url = "https://files.pythonhosted.org/packages/0b/f8/7aacb8e5f4a7899d39c787b5984e912e6c18b11be136ef13947d7a66d265/hf_xet-1.4.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e23717ce4186b265f69afa66e6f0069fe7efbf331546f5c313d00e123dc84583", size = 3562178, upload-time = "2026-03-31T22:39:51.295Z" },
{ url = "https://files.pythonhosted.org/packages/df/9a/a24b26dc8a65f0ecc0fe5be981a19e61e7ca963b85e062c083f3a9100529/hf_xet-1.4.3-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc360b70c815bf340ed56c7b8c63aacf11762a4b099b2fe2c9bd6d6068668c08", size = 4212320, upload-time = "2026-03-31T22:39:42.922Z" },
{ url = "https://files.pythonhosted.org/packages/53/60/46d493db155d2ee2801b71fb1b0fd67696359047fdd8caee2c914cc50c79/hf_xet-1.4.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:39f2d2e9654cd9b4319885733993807aab6de9dfbd34c42f0b78338d6617421f", size = 3991546, upload-time = "2026-03-31T22:39:41.335Z" },
{ url = "https://files.pythonhosted.org/packages/bc/f5/067363e1c96c6b17256910830d1b54099d06287e10f4ec6ec4e7e08371fc/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:49ad8a8cead2b56051aa84d7fce3e1335efe68df3cf6c058f22a65513885baac", size = 4193200, upload-time = "2026-03-31T22:40:01.936Z" },
{ url = "https://files.pythonhosted.org/packages/42/4b/53951592882d9c23080c7644542fda34a3813104e9e11fa1a7d82d419cb8/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7716d62015477a70ea272d2d68cd7cad140f61c52ee452e133e139abfe2c17ba", size = 4429392, upload-time = "2026-03-31T22:40:03.492Z" },
{ url = "https://files.pythonhosted.org/packages/8a/21/75a6c175b4e79662ad8e62f46a40ce341d8d6b206b06b4320d07d55b188c/hf_xet-1.4.3-cp37-abi3-win_amd64.whl", hash = "sha256:6b591fcad34e272a5b02607485e4f2a1334aebf1bc6d16ce8eb1eb8978ac2021", size = 3677359, upload-time = "2026-03-31T22:40:13.619Z" },
{ url = "https://files.pythonhosted.org/packages/8a/7c/44314ecd0e89f8b2b51c9d9e5e7a60a9c1c82024ac471d415860557d3cd8/hf_xet-1.4.3-cp37-abi3-win_arm64.whl", hash = "sha256:7c2c7e20bcfcc946dc67187c203463f5e932e395845d098cc2a93f5b67ca0b47", size = 3533664, upload-time = "2026-03-31T22:40:12.152Z" },
]
[[package]]
@@ -2450,21 +2453,22 @@ wheels = [
[[package]]
name = "huggingface-hub"
version = "0.35.3"
version = "1.10.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "filelock" },
{ name = "fsspec" },
{ name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
{ name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
{ name = "httpx" },
{ name = "packaging" },
{ name = "pyyaml" },
{ name = "requests" },
{ name = "tqdm" },
{ name = "typer" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/10/7e/a0a97de7c73671863ca6b3f61fa12518caf35db37825e43d63a70956738c/huggingface_hub-0.35.3.tar.gz", hash = "sha256:350932eaa5cc6a4747efae85126ee220e4ef1b54e29d31c3b45c5612ddf0b32a", size = 461798, upload-time = "2025-09-29T14:29:58.625Z" }
sdist = { url = "https://files.pythonhosted.org/packages/0c/4d/00734890c7fcfe2c7ff04f1c1a167186c42b19e370a2dd8cfd8c34fc92c4/huggingface_hub-1.10.2.tar.gz", hash = "sha256:4b276f820483b709dc86a53bcb8183ea496b8d8447c9f7f88a115a12b498a95f", size = 758428, upload-time = "2026-04-14T10:42:28.498Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/31/a0/651f93d154cb72323358bf2bbae3e642bdb5d2f1bfc874d096f7cb159fa0/huggingface_hub-0.35.3-py3-none-any.whl", hash = "sha256:0e3a01829c19d86d03793e4577816fe3bdfc1602ac62c7fb220d593d351224ba", size = 564262, upload-time = "2025-09-29T14:29:55.813Z" },
{ url = "https://files.pythonhosted.org/packages/5e/c9/4c1e1216b24bcab140c83acdf8bc89a846ea17cd8a06cd18e3fd308a297f/huggingface_hub-1.10.2-py3-none-any.whl", hash = "sha256:c26c908767cc711493978dc0b4f5747ba7841602997cc98bfd628450a28cf9bc", size = 642581, upload-time = "2026-04-14T10:42:26.563Z" },
]
[[package]]
@@ -4444,7 +4448,7 @@ backend = [
{ name = "httpx", extras = ["http2"], specifier = "==0.28.1" },
{ name = "httpx-oauth", specifier = "==0.15.1" },
{ name = "hubspot-api-client", specifier = "==11.1.0" },
{ name = "huggingface-hub", specifier = "==0.35.3" },
{ name = "huggingface-hub", specifier = "==1.10.2" },
{ name = "inflection", specifier = "==0.5.1" },
{ name = "jira", specifier = "==3.10.5" },
{ name = "jsonref", specifier = "==1.1.0" },
@@ -4467,7 +4471,7 @@ backend = [
{ name = "opensearch-py", specifier = "==3.0.0" },
{ name = "opentelemetry-proto", specifier = ">=1.39.0" },
{ name = "passlib", specifier = "==1.7.4" },
{ name = "playwright", specifier = "==1.55.0" },
{ name = "playwright", specifier = "==1.58.0" },
{ name = "psutil", specifier = "==7.1.3" },
{ name = "psycopg2-binary", specifier = "==2.9.9" },
{ name = "puremagic", specifier = "==1.28" },
@@ -4555,10 +4559,10 @@ model-server = [
{ name = "einops", specifier = "==0.8.1" },
{ name = "numpy", specifier = "==2.4.1" },
{ name = "safetensors", specifier = "==0.5.3" },
{ name = "sentence-transformers", specifier = "==4.0.2" },
{ name = "sentence-transformers", specifier = "==5.4.1" },
{ name = "sentry-sdk", extras = ["fastapi", "celery", "starlette"], specifier = "==2.14.0" },
{ name = "torch", specifier = "==2.9.1" },
{ name = "transformers", specifier = "==4.53.0" },
{ name = "transformers", specifier = "==5.5.4" },
]
[[package]]
@@ -5077,21 +5081,21 @@ wheels = [
[[package]]
name = "playwright"
version = "1.55.0"
version = "1.58.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "greenlet" },
{ name = "pyee" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/80/3a/c81ff76df266c62e24f19718df9c168f49af93cabdbc4608ae29656a9986/playwright-1.55.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:d7da108a95001e412effca4f7610de79da1637ccdf670b1ae3fdc08b9694c034", size = 40428109, upload-time = "2025-08-28T15:46:20.357Z" },
{ url = "https://files.pythonhosted.org/packages/cf/f5/bdb61553b20e907196a38d864602a9b4a461660c3a111c67a35179b636fa/playwright-1.55.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:8290cf27a5d542e2682ac274da423941f879d07b001f6575a5a3a257b1d4ba1c", size = 38687254, upload-time = "2025-08-28T15:46:23.925Z" },
{ url = "https://files.pythonhosted.org/packages/4a/64/48b2837ef396487807e5ab53c76465747e34c7143fac4a084ef349c293a8/playwright-1.55.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:25b0d6b3fd991c315cca33c802cf617d52980108ab8431e3e1d37b5de755c10e", size = 40428108, upload-time = "2025-08-28T15:46:27.119Z" },
{ url = "https://files.pythonhosted.org/packages/08/33/858312628aa16a6de97839adc2ca28031ebc5391f96b6fb8fdf1fcb15d6c/playwright-1.55.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:c6d4d8f6f8c66c483b0835569c7f0caa03230820af8e500c181c93509c92d831", size = 45905643, upload-time = "2025-08-28T15:46:30.312Z" },
{ url = "https://files.pythonhosted.org/packages/83/83/b8d06a5b5721931aa6d5916b83168e28bd891f38ff56fe92af7bdee9860f/playwright-1.55.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29a0777c4ce1273acf90c87e4ae2fe0130182100d99bcd2ae5bf486093044838", size = 45296647, upload-time = "2025-08-28T15:46:33.221Z" },
{ url = "https://files.pythonhosted.org/packages/06/2e/9db64518aebcb3d6ef6cd6d4d01da741aff912c3f0314dadb61226c6a96a/playwright-1.55.0-py3-none-win32.whl", hash = "sha256:29e6d1558ad9d5b5c19cbec0a72f6a2e35e6353cd9f262e22148685b86759f90", size = 35476046, upload-time = "2025-08-28T15:46:36.184Z" },
{ url = "https://files.pythonhosted.org/packages/46/4f/9ba607fa94bb9cee3d4beb1c7b32c16efbfc9d69d5037fa85d10cafc618b/playwright-1.55.0-py3-none-win_amd64.whl", hash = "sha256:7eb5956473ca1951abb51537e6a0da55257bb2e25fc37c2b75af094a5c93736c", size = 35476048, upload-time = "2025-08-28T15:46:38.867Z" },
{ url = "https://files.pythonhosted.org/packages/21/98/5ca173c8ec906abde26c28e1ecb34887343fd71cc4136261b90036841323/playwright-1.55.0-py3-none-win_arm64.whl", hash = "sha256:012dc89ccdcbd774cdde8aeee14c08e0dd52ddb9135bf10e9db040527386bd76", size = 31225543, upload-time = "2025-08-28T15:46:41.613Z" },
{ url = "https://files.pythonhosted.org/packages/f8/c9/9c6061d5703267f1baae6a4647bfd1862e386fbfdb97d889f6f6ae9e3f64/playwright-1.58.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:96e3204aac292ee639edbfdef6298b4be2ea0a55a16b7068df91adac077cc606", size = 42251098, upload-time = "2026-01-30T15:09:24.028Z" },
{ url = "https://files.pythonhosted.org/packages/e0/40/59d34a756e02f8c670f0fee987d46f7ee53d05447d43cd114ca015cb168c/playwright-1.58.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:70c763694739d28df71ed578b9c8202bb83e8fe8fb9268c04dd13afe36301f71", size = 41039625, upload-time = "2026-01-30T15:09:27.558Z" },
{ url = "https://files.pythonhosted.org/packages/e1/ee/3ce6209c9c74a650aac9028c621f357a34ea5cd4d950700f8e2c4b7fe2c4/playwright-1.58.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:185e0132578733d02802dfddfbbc35f42be23a45ff49ccae5081f25952238117", size = 42251098, upload-time = "2026-01-30T15:09:30.461Z" },
{ url = "https://files.pythonhosted.org/packages/f1/af/009958cbf23fac551a940d34e3206e6c7eed2b8c940d0c3afd1feb0b0589/playwright-1.58.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:c95568ba1eda83812598c1dc9be60b4406dffd60b149bc1536180ad108723d6b", size = 46235268, upload-time = "2026-01-30T15:09:33.787Z" },
{ url = "https://files.pythonhosted.org/packages/d9/a6/0e66ad04b6d3440dae73efb39540c5685c5fc95b17c8b29340b62abbd952/playwright-1.58.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f9999948f1ab541d98812de25e3a8c410776aa516d948807140aff797b4bffa", size = 45964214, upload-time = "2026-01-30T15:09:36.751Z" },
{ url = "https://files.pythonhosted.org/packages/0e/4b/236e60ab9f6d62ed0fd32150d61f1f494cefbf02304c0061e78ed80c1c32/playwright-1.58.0-py3-none-win32.whl", hash = "sha256:1e03be090e75a0fabbdaeab65ce17c308c425d879fa48bb1d7986f96bfad0b99", size = 36815998, upload-time = "2026-01-30T15:09:39.627Z" },
{ url = "https://files.pythonhosted.org/packages/41/f8/5ec599c5e59d2f2f336a05b4f318e733077cd5044f24adb6f86900c3e6a7/playwright-1.58.0-py3-none-win_amd64.whl", hash = "sha256:a2bf639d0ce33b3ba38de777e08697b0d8f3dc07ab6802e4ac53fb65e3907af8", size = 36816005, upload-time = "2026-01-30T15:09:42.449Z" },
{ url = "https://files.pythonhosted.org/packages/c8/c4/cc0229fea55c87d6c9c67fe44a21e2cd28d1d558a5478ed4d617e9fb0c93/playwright-1.58.0-py3-none-win_arm64.whl", hash = "sha256:32ffe5c303901a13a0ecab91d1c3f74baf73b84f4bedbb6b935f5bc11cc98e1b", size = 33085919, upload-time = "2026-01-30T15:09:45.71Z" },
]
[[package]]
@@ -6829,11 +6833,11 @@ wheels = [
[[package]]
name = "sentence-transformers"
version = "4.0.2"
version = "5.4.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "huggingface-hub" },
{ name = "pillow" },
{ name = "numpy" },
{ name = "scikit-learn" },
{ name = "scipy" },
{ name = "torch" },
@@ -6841,9 +6845,9 @@ dependencies = [
{ name = "transformers" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/3c/74/606783c6b2e80a609af25d2c487bbe32eb8f0970b0f7519fbb1a099f3ab8/sentence_transformers-4.0.2.tar.gz", hash = "sha256:d33d0c5a69ae0d682115c90e74fc1dc24c4786aeea78e26f5889b037e5921880", size = 267724, upload-time = "2025-04-03T11:29:06.046Z" }
sdist = { url = "https://files.pythonhosted.org/packages/4d/68/7f98c221940ce783b492ad6140384daf2e2918cd7175009d6a362c22b9ee/sentence_transformers-5.4.1.tar.gz", hash = "sha256:436bcb1182a0ff42a8fb2b1c43498a70d0a75b688d182f2cd0d1dd115af61ddc", size = 428910, upload-time = "2026-04-14T13:34:59.006Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/32/58/770e1e762893abbfe3cd048f1ed1ea6e00122a195651ea98fb27f55ad17a/sentence_transformers-4.0.2-py3-none-any.whl", hash = "sha256:25f5086d0746c22177f9fb7d431f3eebe6375f3afe1dc7c341c4ca9061e98771", size = 340618, upload-time = "2025-04-03T11:29:04.037Z" },
{ url = "https://files.pythonhosted.org/packages/c5/d9/3a9b6f2ccdedc9dc00fe37b2fc58f58f8efbff44565cf4bf39d8568bb13a/sentence_transformers-5.4.1-py3-none-any.whl", hash = "sha256:a6d640fc363849b63affb8e140e9d328feabab86f83d58ac3e16b1c28140b790", size = 571311, upload-time = "2026-04-14T13:34:57.731Z" },
]
[[package]]
@@ -7192,27 +7196,28 @@ wheels = [
[[package]]
name = "tokenizers"
version = "0.21.4"
version = "0.22.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "huggingface-hub" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c2/2f/402986d0823f8d7ca139d969af2917fefaa9b947d1fb32f6168c509f2492/tokenizers-0.21.4.tar.gz", hash = "sha256:fa23f85fbc9a02ec5c6978da172cdcbac23498c3ca9f3645c5c68740ac007880", size = 351253, upload-time = "2025-07-28T15:48:54.325Z" }
sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/98/c6/fdb6f72bf6454f52eb4a2510be7fb0f614e541a2554d6210e370d85efff4/tokenizers-0.21.4-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:2ccc10a7c3bcefe0f242867dc914fc1226ee44321eb618cfe3019b5df3400133", size = 2863987, upload-time = "2025-07-28T15:48:44.877Z" },
{ url = "https://files.pythonhosted.org/packages/8d/a6/28975479e35ddc751dc1ddc97b9b69bf7fcf074db31548aab37f8116674c/tokenizers-0.21.4-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:5e2f601a8e0cd5be5cc7506b20a79112370b9b3e9cb5f13f68ab11acd6ca7d60", size = 2732457, upload-time = "2025-07-28T15:48:43.265Z" },
{ url = "https://files.pythonhosted.org/packages/aa/8f/24f39d7b5c726b7b0be95dca04f344df278a3fe3a4deb15a975d194cbb32/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b376f5a1aee67b4d29032ee85511bbd1b99007ec735f7f35c8a2eb104eade5", size = 3012624, upload-time = "2025-07-28T13:22:43.895Z" },
{ url = "https://files.pythonhosted.org/packages/58/47/26358925717687a58cb74d7a508de96649544fad5778f0cd9827398dc499/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2107ad649e2cda4488d41dfd031469e9da3fcbfd6183e74e4958fa729ffbf9c6", size = 2939681, upload-time = "2025-07-28T13:22:47.499Z" },
{ url = "https://files.pythonhosted.org/packages/99/6f/cc300fea5db2ab5ddc2c8aea5757a27b89c84469899710c3aeddc1d39801/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c73012da95afafdf235ba80047699df4384fdc481527448a078ffd00e45a7d9", size = 3247445, upload-time = "2025-07-28T15:48:39.711Z" },
{ url = "https://files.pythonhosted.org/packages/be/bf/98cb4b9c3c4afd8be89cfa6423704337dc20b73eb4180397a6e0d456c334/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f23186c40395fc390d27f519679a58023f368a0aad234af145e0f39ad1212732", size = 3428014, upload-time = "2025-07-28T13:22:49.569Z" },
{ url = "https://files.pythonhosted.org/packages/75/c7/96c1cc780e6ca7f01a57c13235dd05b7bc1c0f3588512ebe9d1331b5f5ae/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc88bb34e23a54cc42713d6d98af5f1bf79c07653d24fe984d2d695ba2c922a2", size = 3193197, upload-time = "2025-07-28T13:22:51.471Z" },
{ url = "https://files.pythonhosted.org/packages/f2/90/273b6c7ec78af547694eddeea9e05de771278bd20476525ab930cecaf7d8/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51b7eabb104f46c1c50b486520555715457ae833d5aee9ff6ae853d1130506ff", size = 3115426, upload-time = "2025-07-28T15:48:41.439Z" },
{ url = "https://files.pythonhosted.org/packages/91/43/c640d5a07e95f1cf9d2c92501f20a25f179ac53a4f71e1489a3dcfcc67ee/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:714b05b2e1af1288bd1bc56ce496c4cebb64a20d158ee802887757791191e6e2", size = 9089127, upload-time = "2025-07-28T15:48:46.472Z" },
{ url = "https://files.pythonhosted.org/packages/44/a1/dd23edd6271d4dca788e5200a807b49ec3e6987815cd9d0a07ad9c96c7c2/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:1340ff877ceedfa937544b7d79f5b7becf33a4cfb58f89b3b49927004ef66f78", size = 9055243, upload-time = "2025-07-28T15:48:48.539Z" },
{ url = "https://files.pythonhosted.org/packages/21/2b/b410d6e9021c4b7ddb57248304dc817c4d4970b73b6ee343674914701197/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:3c1f4317576e465ac9ef0d165b247825a2a4078bcd01cba6b54b867bdf9fdd8b", size = 9298237, upload-time = "2025-07-28T15:48:50.443Z" },
{ url = "https://files.pythonhosted.org/packages/b7/0a/42348c995c67e2e6e5c89ffb9cfd68507cbaeb84ff39c49ee6e0a6dd0fd2/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:c212aa4e45ec0bb5274b16b6f31dd3f1c41944025c2358faaa5782c754e84c24", size = 9461980, upload-time = "2025-07-28T15:48:52.325Z" },
{ url = "https://files.pythonhosted.org/packages/3d/d3/dacccd834404cd71b5c334882f3ba40331ad2120e69ded32cf5fda9a7436/tokenizers-0.21.4-cp39-abi3-win32.whl", hash = "sha256:6c42a930bc5f4c47f4ea775c91de47d27910881902b0f20e4990ebe045a415d0", size = 2329871, upload-time = "2025-07-28T15:48:56.841Z" },
{ url = "https://files.pythonhosted.org/packages/41/f2/fd673d979185f5dcbac4be7d09461cbb99751554ffb6718d0013af8604cb/tokenizers-0.21.4-cp39-abi3-win_amd64.whl", hash = "sha256:475d807a5c3eb72c59ad9b5fcdb254f6e17f53dfcbb9903233b0dfa9c943b597", size = 2507568, upload-time = "2025-07-28T15:48:55.456Z" },
{ url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" },
{ url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" },
{ url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" },
{ url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" },
{ url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" },
{ url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" },
{ url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" },
{ url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" },
{ url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" },
{ url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" },
{ url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" },
{ url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" },
{ url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload-time = "2026-01-05T10:45:20.593Z" },
{ url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload-time = "2026-01-05T10:45:18.411Z" },
{ url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" },
]
[[package]]
@@ -7338,23 +7343,22 @@ wheels = [
[[package]]
name = "transformers"
version = "4.53.0"
version = "5.5.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "filelock" },
{ name = "huggingface-hub" },
{ name = "numpy" },
{ name = "packaging" },
{ name = "pyyaml" },
{ name = "regex" },
{ name = "requests" },
{ name = "safetensors" },
{ name = "tokenizers" },
{ name = "tqdm" },
{ name = "typer" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e8/40/f2d2c3bcf5c6135027cab0fd7db52f6149a1c23acc4e45f914c43d362386/transformers-4.53.0.tar.gz", hash = "sha256:f89520011b4a73066fdc7aabfa158317c3934a22e3cd652d7ffbc512c4063841", size = 9177265, upload-time = "2025-06-26T16:10:54.729Z" }
sdist = { url = "https://files.pythonhosted.org/packages/a5/1e/1e244ab2ab50a863e6b52cc55761910567fa532b69a6740f6e99c5fdbd98/transformers-5.5.4.tar.gz", hash = "sha256:2e67cadba81fc7608cc07c4dd54f524820bc3d95b1cabd0ef3db7733c4f8b82e", size = 8227649, upload-time = "2026-04-13T16:55:55.181Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5e/0c/68d03a38f6ab2ba2b2829eb11b334610dd236e7926787f7656001b68e1f2/transformers-4.53.0-py3-none-any.whl", hash = "sha256:7d8039ff032c01a2d7f8a8fe0066620367003275f023815a966e62203f9f5dd7", size = 10821970, upload-time = "2025-06-26T16:10:51.505Z" },
{ url = "https://files.pythonhosted.org/packages/29/fb/162a66789c65e5afa3b051309240c26bf37fbc8fea285b4546ae747995a2/transformers-5.5.4-py3-none-any.whl", hash = "sha256:0bd6281b82966fe5a7a16f553ea517a9db1dee6284d7cb224dfd88fc0dd1c167", size = 10236696, upload-time = "2026-04-13T16:55:51.497Z" },
]
[[package]]

View File

@@ -1,14 +1,11 @@
import "@opal/components/tooltip.css";
import { Interactive, type InteractiveStatelessProps } from "@opal/core";
import type {
ContainerSizeVariants,
ExtremaSizeVariants,
RichStr,
} from "@opal/types";
import { Text } from "@opal/components";
import type { TooltipSide } from "@opal/components";
import { Text, type TooltipSide, Tooltip } from "@opal/components";
import type { IconFunctionComponent } from "@opal/types";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { iconWrapper } from "@opal/components/buttons/icon-wrapper";
// ---------------------------------------------------------------------------
@@ -118,24 +115,11 @@ function Button({
</Interactive.Stateless>
);
if (tooltip) {
return (
<TooltipPrimitive.Root>
<TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
<TooltipPrimitive.Portal>
<TooltipPrimitive.Content
className="opal-tooltip"
side={tooltipSide}
sideOffset={4}
>
{tooltip}
</TooltipPrimitive.Content>
</TooltipPrimitive.Portal>
</TooltipPrimitive.Root>
);
}
return button;
return (
<Tooltip tooltip={tooltip} side={tooltipSide}>
{button}
</Tooltip>
);
}
export { Button, type ButtonProps };

View File

@@ -3,11 +3,9 @@ import {
type InteractiveStatefulInteraction,
type InteractiveStatefulProps,
} from "@opal/core";
import type { TooltipSide } from "@opal/components";
import { Text, Tooltip, type TooltipSide } from "@opal/components";
import type { IconFunctionComponent, RichStr } from "@opal/types";
import { Text } from "@opal/components";
import { SvgX } from "@opal/icons";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { iconWrapper } from "@opal/components/buttons/icon-wrapper";
import { ChevronIcon } from "@opal/components/buttons/chevron";
import { Button } from "@opal/components/buttons/button/components";
@@ -100,21 +98,10 @@ function FilterButton({
</div>
);
if (!tooltip) return button;
return (
<TooltipPrimitive.Root>
<TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
<TooltipPrimitive.Portal>
<TooltipPrimitive.Content
className="opal-tooltip"
side={tooltipSide}
sideOffset={4}
>
{tooltip}
</TooltipPrimitive.Content>
</TooltipPrimitive.Portal>
</TooltipPrimitive.Root>
<Tooltip tooltip={tooltip} side={tooltipSide}>
{button}
</Tooltip>
);
}

View File

@@ -3,12 +3,10 @@ import {
type InteractiveStatefulProps,
InteractiveContainerRoundingVariant,
} from "@opal/core";
import type { ExtremaSizeVariants } from "@opal/types";
import type { TooltipSide } from "@opal/components";
import type { DistributiveOmit } from "@opal/types";
import type { ExtremaSizeVariants, DistributiveOmit } from "@opal/types";
import { Tooltip, type TooltipSide } from "@opal/components";
import type { ContentActionProps } from "@opal/layouts/content-action/components";
import { ContentAction } from "@opal/layouts";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
// ---------------------------------------------------------------------------
// Types
@@ -98,21 +96,10 @@ function LineItemButton({
</Interactive.Stateful>
);
if (!tooltip) return item;
return (
<TooltipPrimitive.Root>
<TooltipPrimitive.Trigger asChild>{item}</TooltipPrimitive.Trigger>
<TooltipPrimitive.Portal>
<TooltipPrimitive.Content
className="opal-tooltip"
side={tooltipSide}
sideOffset={4}
>
{tooltip}
</TooltipPrimitive.Content>
</TooltipPrimitive.Portal>
</TooltipPrimitive.Root>
<Tooltip tooltip={tooltip} side={tooltipSide}>
{item}
</Tooltip>
);
}

View File

@@ -6,13 +6,11 @@ import {
import type {
ContainerSizeVariants,
ExtremaSizeVariants,
IconFunctionComponent,
RichStr,
} from "@opal/types";
import { Text } from "@opal/components";
import { Text, Tooltip, type TooltipSide } from "@opal/components";
import type { InteractiveContainerRoundingVariant } from "@opal/core";
import type { TooltipSide } from "@opal/components";
import type { IconFunctionComponent } from "@opal/types";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { cn } from "@opal/utils";
import { iconWrapper } from "@opal/components/buttons/icon-wrapper";
import { ChevronIcon } from "@opal/components/buttons/chevron";
@@ -172,21 +170,10 @@ function OpenButton({
const resolvedTooltip =
tooltip ?? (foldable && disabled && children ? children : undefined);
if (!resolvedTooltip) return button;
return (
<TooltipPrimitive.Root>
<TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
<TooltipPrimitive.Portal>
<TooltipPrimitive.Content
className="opal-tooltip"
side={tooltipSide}
sideOffset={4}
>
<Text>{resolvedTooltip}</Text>
</TooltipPrimitive.Content>
</TooltipPrimitive.Portal>
</TooltipPrimitive.Root>
<Tooltip tooltip={resolvedTooltip} side={tooltipSide}>
{button}
</Tooltip>
);
}

View File

@@ -5,12 +5,10 @@ import { Interactive, type InteractiveStatefulProps } from "@opal/core";
import type {
ContainerSizeVariants,
ExtremaSizeVariants,
IconFunctionComponent,
RichStr,
} from "@opal/types";
import { Text } from "@opal/components";
import type { TooltipSide } from "@opal/components";
import type { IconFunctionComponent } from "@opal/types";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { Text, Tooltip, type TooltipSide } from "@opal/components";
import { cn } from "@opal/utils";
import { iconWrapper } from "@opal/components/buttons/icon-wrapper";
@@ -129,21 +127,10 @@ function SelectButton({
const resolvedTooltip =
tooltip ?? (foldable && disabled && children ? children : undefined);
if (!resolvedTooltip) return button;
return (
<TooltipPrimitive.Root>
<TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
<TooltipPrimitive.Portal>
<TooltipPrimitive.Content
className="opal-tooltip"
side={tooltipSide}
sideOffset={4}
>
<Text>{resolvedTooltip}</Text>
</TooltipPrimitive.Content>
</TooltipPrimitive.Portal>
</TooltipPrimitive.Root>
<Tooltip tooltip={resolvedTooltip} side={tooltipSide}>
{button}
</Tooltip>
);
}

View File

@@ -5,10 +5,8 @@ import type { ButtonType, IconFunctionComponent } from "@opal/types";
import type { Route } from "next";
import { Interactive, type InteractiveStatefulVariant } from "@opal/core";
import { ContentAction } from "@opal/layouts";
import { Text } from "@opal/components";
import { Text, Tooltip } from "@opal/components";
import Link from "next/link";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import "@opal/components/tooltip.css";
// ---------------------------------------------------------------------------
// Types
@@ -145,18 +143,9 @@ function SidebarTab({
if (typeof children !== "string") return content;
if (folded) {
return (
<TooltipPrimitive.Root>
<TooltipPrimitive.Trigger asChild>{content}</TooltipPrimitive.Trigger>
<TooltipPrimitive.Portal>
<TooltipPrimitive.Content
className="opal-tooltip"
side="right"
sideOffset={4}
>
{children}
</TooltipPrimitive.Content>
</TooltipPrimitive.Portal>
</TooltipPrimitive.Root>
<Tooltip tooltip={children} side="right">
{content}
</Tooltip>
);
}
return content;

View File

@@ -0,0 +1,106 @@
import type { Meta, StoryObj } from "@storybook/react";
import { MessageCard } from "@opal/components/cards/message-card/components";
import { Button } from "@opal/components/buttons/button/components";
const meta: Meta<typeof MessageCard> = {
title: "opal/components/MessageCard",
component: MessageCard,
tags: ["autodocs"],
};
export default meta;
type Story = StoryObj<typeof MessageCard>;
export const Default: Story = {
render: () => (
<div className="w-[32rem]">
<MessageCard title="Note" description="This is a default message card." />
</div>
),
};
export const Info: Story = {
render: () => (
<div className="w-[32rem]">
<MessageCard
variant="info"
title="Heads up"
description="Changes apply to newly indexed documents only."
/>
</div>
),
};
export const Success: Story = {
render: () => (
<div className="w-[32rem]">
<MessageCard
variant="success"
title="All set"
description="Your embedding model has been updated successfully."
/>
</div>
),
};
export const Warning: Story = {
render: () => (
<div className="w-[32rem]">
<MessageCard
variant="warning"
title="Re-indexing required"
description="Toggle this setting to re-index all documents."
/>
</div>
),
};
export const Error: Story = {
render: () => (
<div className="w-[32rem]">
<MessageCard
variant="error"
title="Connection failed"
description="Unable to reach the embedding model server."
/>
</div>
),
};
export const WithBottomChildren: Story = {
render: () => (
<div className="w-[32rem]">
<MessageCard
variant="warning"
title="Action required"
description="Your documents need to be re-indexed after this change."
bottomChildren={
<div className="flex justify-end pt-2">
<Button prominence="secondary" size="sm">
Re-index Now
</Button>
</div>
}
/>
</div>
),
};
export const AllVariants: Story = {
render: () => (
<div className="flex flex-col gap-4 w-[32rem]">
{(["default", "info", "success", "warning", "error"] as const).map(
(variant) => (
<MessageCard
key={variant}
variant={variant}
title={`${
variant.charAt(0).toUpperCase() + variant.slice(1)
} variant`}
description={`This is a ${variant} message card.`}
/>
)
)}
</div>
),
};

View File

@@ -0,0 +1,47 @@
# MessageCard
**Import:** `import { MessageCard } from "@opal/components";`
A styled card for displaying messages, alerts, or status notifications. Uses `Content` internally
for consistent title/description/icon layout. Supports 5 variants with corresponding background
and border colors.
## Props
| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `variant` | `"default" \| "info" \| "success" \| "warning" \| "error"` | `"default"` | Visual variant (controls background, border, and icon) |
| `icon` | `IconFunctionComponent` | per variant | Override the default variant icon |
| `title` | `string \| RichStr` | — | Main title text |
| `description` | `string \| RichStr` | — | Description below the title |
| `bottomChildren` | `ReactNode` | — | Content below a divider, under the main content |
| `rightChildren` | `ReactNode` | — | Content on the right side. Mutually exclusive with `onClose`. |
| `onClose` | `() => void` | — | Close button callback. When omitted, no close button is rendered. |
## Usage
```tsx
import { MessageCard } from "@opal/components";
// Simple info message
<MessageCard
variant="info"
title="Heads up"
description="Changes apply to newly indexed documents only."
/>
// Warning with bottom content
<MessageCard
variant="warning"
title="Re-indexing required"
description="Toggle this setting to re-index all documents."
bottomChildren={<Button>Re-index Now</Button>}
/>
// Error state
<MessageCard
variant="error"
title="Connection failed"
description="Unable to reach the embedding model server."
/>
```

View File

@@ -0,0 +1,160 @@
import "@opal/components/cards/message-card/styles.css";
import { cn } from "@opal/utils";
import type { RichStr, IconFunctionComponent } from "@opal/types";
import { ContentAction } from "@opal/layouts";
import { Button, Divider } from "@opal/components";
import {
SvgAlertCircle,
SvgAlertTriangle,
SvgCheckCircle,
SvgX,
SvgXOctagon,
} from "@opal/icons";
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
type MessageCardVariant = "default" | "info" | "success" | "warning" | "error";
interface MessageCardBaseProps {
/** Visual variant controlling background, border, and icon. @default "default" */
variant?: MessageCardVariant;
/** Override the default variant icon. */
icon?: IconFunctionComponent;
/** Main title text. */
title: string | RichStr;
/** Optional description below the title. */
description?: string | RichStr;
/**
* Content rendered below a divider, under the main content area.
* When provided, a `Divider` is inserted between the `ContentAction` and this node.
*/
bottomChildren?: React.ReactNode;
/** Ref forwarded to the root `<div>`. */
ref?: React.Ref<HTMLDivElement>;
}
type MessageCardProps = MessageCardBaseProps &
(
| {
/** Content rendered on the right side of the card. Mutually exclusive with `onClose`. */
rightChildren?: React.ReactNode;
onClose?: never;
}
| {
rightChildren?: never;
/** Close button callback. Mutually exclusive with `rightChildren`. */
onClose?: () => void;
}
);
// ---------------------------------------------------------------------------
// Variant config
// ---------------------------------------------------------------------------
const VARIANT_CONFIG: Record<
MessageCardVariant,
{ icon: IconFunctionComponent; iconClass: string }
> = {
default: { icon: SvgAlertCircle, iconClass: "stroke-text-03" },
info: { icon: SvgAlertCircle, iconClass: "stroke-status-info-05" },
success: { icon: SvgCheckCircle, iconClass: "stroke-status-success-05" },
warning: { icon: SvgAlertTriangle, iconClass: "stroke-status-warning-05" },
error: { icon: SvgXOctagon, iconClass: "stroke-status-error-05" },
};
// ---------------------------------------------------------------------------
// MessageCard
// ---------------------------------------------------------------------------
/**
* A styled card for displaying messages, alerts, or status notifications.
*
* Uses `ContentAction` internally for consistent title/description/icon layout
* with optional right-side actions. Supports 5 variants with corresponding
* background, border, and icon colors.
*
* `onClose` and `rightChildren` are mutually exclusive — specify one or neither.
*
* @example
* ```tsx
* import { MessageCard } from "@opal/components";
*
* // Simple message
* <MessageCard
* variant="info"
* title="Heads up"
* description="Changes apply to newly indexed documents only."
* />
*
* // With close button
* <MessageCard
* variant="warning"
* title="Re-indexing required"
* onClose={() => setDismissed(true)}
* />
*
* // With right children
* <MessageCard
* variant="error"
* title="Connection failed"
* rightChildren={<Button>Retry</Button>}
* />
* ```
*/
function MessageCard({
variant = "default",
icon: iconOverride,
title,
description,
bottomChildren,
rightChildren,
onClose,
ref,
}: MessageCardProps) {
const { icon: DefaultIcon, iconClass } = VARIANT_CONFIG[variant];
const Icon = iconOverride ?? DefaultIcon;
const right = onClose ? (
<Button
icon={SvgX}
prominence="internal"
size="md"
onClick={onClose}
aria-label="Close"
/>
) : (
rightChildren
);
return (
<div className="opal-message-card" data-variant={variant} ref={ref}>
<ContentAction
icon={(props) => (
<Icon {...props} className={cn(props.className, iconClass)} />
)}
title={title}
description={description}
sizePreset="main-ui"
variant="section"
paddingVariant="lg"
rightChildren={right}
/>
{bottomChildren && (
<>
<Divider paddingParallel="sm" paddingPerpendicular="xs" />
{bottomChildren}
</>
)}
</div>
);
}
export { MessageCard, type MessageCardProps, type MessageCardVariant };

View File

@@ -0,0 +1,25 @@
.opal-message-card {
@apply flex flex-col self-stretch rounded-16 border p-2;
}
/* Variant colors */
.opal-message-card[data-variant="default"] {
@apply bg-background-tint-01 border-border-01;
}
.opal-message-card[data-variant="info"] {
@apply bg-status-info-00 border-status-info-02;
}
.opal-message-card[data-variant="success"] {
@apply bg-status-success-00 border-status-success-02;
}
.opal-message-card[data-variant="warning"] {
@apply bg-status-warning-00 border-status-warning-02;
}
.opal-message-card[data-variant="error"] {
@apply bg-status-error-00 border-status-error-02;
}

View File

@@ -1,7 +1,10 @@
import "@opal/components/tooltip.css";
/* Shared types */
export type TooltipSide = "top" | "bottom" | "left" | "right";
/* Tooltip */
export {
Tooltip,
type TooltipProps,
type TooltipSide,
type TooltipAlign,
} from "@opal/components/tooltip/components";
/* Button */
export {
@@ -80,6 +83,13 @@ export {
type EmptyMessageCardProps,
} from "@opal/components/cards/empty-message-card/components";
/* MessageCard */
export {
MessageCard,
type MessageCardProps,
type MessageCardVariant,
} from "@opal/components/cards/message-card/components";
/* Pagination */
export {
Pagination,

View File

@@ -64,7 +64,7 @@ export default function InlineMarkdown({ content }: InlineMarkdownProps) {
// RichStr helpers
// ---------------------------------------------------------------------------
function isRichStr(value: unknown): value is RichStr {
export function isRichStr(value: unknown): value is RichStr {
return (
typeof value === "object" &&
value !== null &&

View File

@@ -0,0 +1,51 @@
# Tooltip
**Import:** `import { Tooltip } from "@opal/components";`
A minimal tooltip wrapper that shows content on hover. When `tooltip` is `undefined`, children
are returned as-is with no wrapping. Uses Radix Tooltip primitives internally.
Supports both uncontrolled (default hover behavior) and controlled (`open` + `onOpenChange`)
modes.
## Props
| Prop | Type | Default | Description |
|------|------|---------|-------------|
| `tooltip` | `ReactNode \| RichStr` | — | Tooltip content. `string`/`RichStr` rendered via `Text`; `ReactNode` rendered as-is. `undefined` = no tooltip. |
| `side` | `"top" \| "bottom" \| "left" \| "right"` | `"right"` | Which side the tooltip appears on |
| `align` | `"start" \| "center" \| "end"` | `"center"` | Alignment along the tooltip's side axis |
| `open` | `boolean` | — | Controlled open state. When omitted, uses default hover behavior. |
| `onOpenChange` | `(open: boolean) => void` | — | Callback when open state changes. Use with `open` for controlled mode. |
| `delayDuration` | `number` | — | Delay in ms before the tooltip appears on hover |
| `sideOffset` | `number` | `4` | Distance in pixels between the trigger and the tooltip |
## Usage
```tsx
import { Tooltip } from "@opal/components";
// Uncontrolled (default hover behavior)
<Tooltip tooltip="Delete this item">
<Button icon={SvgTrash} />
</Tooltip>
// Controlled
const [isOpen, setIsOpen] = useState(false);
<Tooltip tooltip="Details" open={isOpen} onOpenChange={setIsOpen}>
<Button icon={SvgInfo} />
</Tooltip>
// Conditional — no tooltip when undefined
<Tooltip tooltip={isDisabled ? "Not available" : undefined}>
<Button>Action</Button>
</Tooltip>
```
## Notes
- Children must be a single element compatible with Radix `asChild` (DOM element or a component
that forwards refs).
- `string` and `RichStr` content is rendered via `Text font="secondary-body" color="inherit"`.
- `ReactNode` content is rendered as-is for custom tooltip layouts.
- The `opal-tooltip` CSS class provides z-indexing, animations, and a `max-width: 20rem` cap.

View File

@@ -0,0 +1,63 @@
import type { Meta, StoryObj } from "@storybook/react";
import type { Decorator } from "@storybook/react";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { Tooltip } from "@opal/components/tooltip/components";
import { Button } from "@opal/components/buttons/button/components";
import { Card } from "@opal/components";
const withTooltipProvider: Decorator = (Story) => (
<TooltipPrimitive.Provider>
<Story />
</TooltipPrimitive.Provider>
);
const meta: Meta<typeof Tooltip> = {
title: "opal/components/Tooltip",
component: Tooltip,
tags: ["autodocs"],
decorators: [withTooltipProvider],
};
export default meta;
type Story = StoryObj<typeof Tooltip>;
export const Default: Story = {
render: () => (
<Tooltip tooltip="This is a tooltip">
<Button prominence="secondary">Hover me</Button>
</Tooltip>
),
};
export const Sides: Story = {
render: () => (
<div className="flex gap-8 items-center py-16 px-32">
{(["top", "right", "bottom", "left"] as const).map((side) => (
<Tooltip key={side} tooltip={`Tooltip on ${side}`} side={side}>
<Button prominence="secondary" size="sm">
{side}
</Button>
</Tooltip>
))}
</div>
),
};
export const OnCard: Story = {
render: () => (
<Tooltip tooltip="Card tooltip appears on hover">
<Card border="solid" padding="md">
<p className="text-sm">Hover this card</p>
</Card>
</Tooltip>
),
};
export const NoTooltip: Story = {
name: "No tooltip (passthrough)",
render: () => (
<Tooltip tooltip={undefined}>
<Button prominence="secondary">No tooltip</Button>
</Tooltip>
),
};

View File

@@ -0,0 +1,131 @@
"use client";
import "@opal/components/tooltip/styles.css";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import type { RichStr } from "@opal/types";
import { Text } from "@opal/components";
import { isRichStr } from "@opal/components/text/InlineMarkdown";
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
type TooltipSide = "top" | "bottom" | "left" | "right";
type TooltipAlign = "start" | "center" | "end";
interface TooltipProps {
/**
* Tooltip content shown on hover. When `undefined`, the tooltip is not
* rendered and children are returned as-is.
*
* - `string` or `RichStr` — rendered via `Text` with consistent styling.
* - `ReactNode` — rendered as-is for custom tooltip content.
*/
tooltip?: React.ReactNode | RichStr;
/** Which side the tooltip appears on. @default "right" */
side?: TooltipSide;
/** Alignment along the tooltip's side axis. @default "center" */
align?: TooltipAlign;
/**
* Controlled open state. When provided, the tooltip's visibility is
* externally managed. When omitted, the tooltip uses Radix's default
* hover-based open handling.
*/
open?: boolean;
/**
* Callback fired when the tooltip's open state changes. Use with `open`
* for controlled behavior.
*/
onOpenChange?: (open: boolean) => void;
/**
* Delay in milliseconds before the tooltip appears on hover.
* Passed to `TooltipPrimitive.Root`.
*/
delayDuration?: number;
/** Distance in pixels between the trigger and the tooltip. @default 4 */
sideOffset?: number;
/**
* Children to wrap. Must be a single element compatible with Radix
* `asChild` (i.e. a DOM element or a component that forwards refs).
*/
children: React.ReactElement;
}
// ---------------------------------------------------------------------------
// Tooltip
// ---------------------------------------------------------------------------
/**
* A minimal tooltip wrapper that shows content on hover.
*
* Renders nothing extra when `tooltip` is `undefined` — just passes children
* through. When `tooltip` is provided, wraps children with a Radix tooltip.
*
* Supports both uncontrolled (default hover behavior) and controlled
* (`open` + `onOpenChange`) modes.
*
* @example
* ```tsx
* import { Tooltip } from "@opal/components";
*
* // Uncontrolled (default)
* <Tooltip tooltip="Delete this item">
* <Button icon={SvgTrash} />
* </Tooltip>
*
* // Controlled
* <Tooltip tooltip="Details" open={isOpen} onOpenChange={setIsOpen}>
* <Button icon={SvgInfo} />
* </Tooltip>
* ```
*/
function Tooltip({
tooltip,
side = "right",
align = "center",
open,
onOpenChange,
delayDuration,
sideOffset = 4,
children,
}: TooltipProps) {
if (tooltip == null) return children;
const content =
typeof tooltip === "string" || isRichStr(tooltip) ? (
<Text font="secondary-body" color="inherit">
{tooltip}
</Text>
) : (
tooltip
);
return (
<TooltipPrimitive.Root
open={open}
onOpenChange={onOpenChange}
delayDuration={delayDuration}
>
<TooltipPrimitive.Trigger asChild>{children}</TooltipPrimitive.Trigger>
<TooltipPrimitive.Portal>
<TooltipPrimitive.Content
className="opal-tooltip"
side={side}
align={align}
sideOffset={sideOffset}
>
{content}
</TooltipPrimitive.Content>
</TooltipPrimitive.Portal>
</TooltipPrimitive.Root>
);
}
export { Tooltip, type TooltipProps, type TooltipSide, type TooltipAlign };

View File

@@ -1,5 +1,3 @@
/* Shared tooltip content styling */
.opal-tooltip {
z-index: var(--z-tooltip, 1300);
max-width: 20rem;

View File

@@ -1,11 +1,8 @@
import "@opal/core/disabled/styles.css";
import "@opal/components/tooltip.css";
import React from "react";
import * as TooltipPrimitive from "@radix-ui/react-tooltip";
import { Slot } from "@radix-ui/react-slot";
import type { TooltipSide } from "@opal/components";
import { Tooltip, type TooltipSide } from "@opal/components";
import type { RichStr } from "@opal/types";
import { Text } from "@opal/components";
// ---------------------------------------------------------------------------
// Types
@@ -88,21 +85,10 @@ function Disabled({
if (!showTooltip) return wrapper;
// TODO(@raunakab): Replace this raw Radix tooltip with the opalified
// Tooltip component once it lands.
return (
<TooltipPrimitive.Root>
<TooltipPrimitive.Trigger asChild>{wrapper}</TooltipPrimitive.Trigger>
<TooltipPrimitive.Portal>
<TooltipPrimitive.Content
className="opal-tooltip"
side={tooltipSide}
sideOffset={4}
>
<Text font="secondary-body">{tooltip}</Text>
</TooltipPrimitive.Content>
</TooltipPrimitive.Portal>
</TooltipPrimitive.Root>
<Tooltip tooltip={tooltip} side={tooltipSide}>
{wrapper}
</Tooltip>
);
}

View File

@@ -15,6 +15,7 @@ type InteractiveStatefulVariant =
| "select-heavy"
| "select-card"
| "select-tinted"
| "select-input"
| "select-filter"
| "sidebar-heavy"
| "sidebar-light";
@@ -35,6 +36,7 @@ interface InteractiveStatefulProps
* - `"select-heavy"` — tinted selected background (for list rows, model pickers)
* - `"select-card"` — like select-heavy but filled state has a visible background (for cards/larger surfaces)
* - `"select-tinted"` — like select-heavy but with a tinted rest background
* - `"select-input"` — rests at neutral-00 (matches input bar), hover/open shows neutral-03 + border-01
* - `"select-filter"` — like select-tinted for empty/filled; selected state uses inverted tint backgrounds and inverted text (for filter buttons)
* - `"sidebar-heavy"` — sidebar navigation items: muted when unselected (text-03/text-02), bold when selected (text-04/text-03)
* - `"sidebar-light"` — sidebar navigation items: uniformly muted across all states (text-02/text-02)

View File

@@ -350,6 +350,41 @@
--interactive-foreground-icon: var(--text-01);
}
/* ---------------------------------------------------------------------------
Select-Input — Empty
Matches input bar background at rest, tints on hover/open.
--------------------------------------------------------------------------- */
.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"] {
@apply bg-background-neutral-00;
--interactive-foreground: var(--text-04);
--interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"]:hover:not(
[data-disabled]
),
.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"][data-interaction="hover"]:not(
[data-disabled]
) {
@apply bg-background-neutral-03;
--interactive-foreground: var(--text-04);
--interactive-foreground-icon: var(--text-03);
}
.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"]:active:not(
[data-disabled]
),
.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"][data-interaction="active"]:not(
[data-disabled]
) {
@apply bg-background-neutral-03;
--interactive-foreground: var(--text-05);
--interactive-foreground-icon: var(--text-05);
}
.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"][data-disabled] {
@apply bg-transparent;
--interactive-foreground: var(--text-01);
--interactive-foreground-icon: var(--text-01);
}
/* ---------------------------------------------------------------------------
Select-Tinted — Filled
--------------------------------------------------------------------------- */

View File

@@ -54,6 +54,8 @@ interface InputLayoutProps {
withLabel?: boolean | string;
disabled?: boolean;
/** Ref forwarded to the inner content `Section`. */
ref?: React.Ref<HTMLDivElement>;
children?: React.ReactNode;
title: string | RichStr;
/** Tag rendered inline beside the title (passed through to Content). */
@@ -73,6 +75,7 @@ export interface VerticalProps extends InputLayoutProps {
function Vertical({
withLabel: withLabelProp = false,
disabled,
ref,
children,
subDescription,
title,
@@ -84,7 +87,7 @@ function Vertical({
typeof withLabelProp === "string" ? withLabelProp : undefined;
const content = (
<Section gap={0.25} alignItems="start">
<Section ref={ref} gap={0.25} alignItems="start">
<Content
title={title}
description={description}
@@ -123,6 +126,7 @@ export interface HorizontalProps extends InputLayoutProps {
function Horizontal({
withLabel: withLabelProp = false,
disabled,
ref,
children,
center,
title,
@@ -134,7 +138,7 @@ function Horizontal({
typeof withLabelProp === "string" ? withLabelProp : undefined;
const content = (
<Section gap={0.25} alignItems="start">
<Section ref={ref} gap={0.25} alignItems="start">
<Section
flexDirection="row"
justifyContent="between"
@@ -210,9 +214,14 @@ export type InputErrorType = "error" | "warning";
interface InputErrorTextProps {
children?: React.ReactNode;
type?: InputErrorType;
ref?: React.Ref<HTMLDivElement>;
}
function InputErrorText({ children, type = "error" }: InputErrorTextProps) {
function InputErrorText({
children,
type = "error",
ref,
}: InputErrorTextProps) {
const Icon = type === "error" ? SvgXOctagon : SvgAlertCircle;
const colorClass =
type === "error" ? "text-status-error-05" : "text-status-warning-05";
@@ -220,7 +229,7 @@ function InputErrorText({ children, type = "error" }: InputErrorTextProps) {
type === "error" ? "stroke-status-error-05" : "stroke-status-warning-05";
return (
<div className="px-1">
<div ref={ref} className="px-1">
{/* TODO(@raunakab): update this with `Content` when it supports custom colours */}
<Section flexDirection="row" justifyContent="start" gap={0.25}>
<Icon size={12} className={strokeClass} />
@@ -250,10 +259,12 @@ function InputDivider() {
// InputPadder
// ---------------------------------------------------------------------------
type InputPadderProps = WithoutStyles<React.HTMLAttributes<HTMLDivElement>>;
type InputPadderProps = WithoutStyles<React.HTMLAttributes<HTMLDivElement>> & {
ref?: React.Ref<HTMLDivElement>;
};
function InputPadder(props: InputPadderProps) {
return <div {...props} className="p-2 w-full" />;
function InputPadder({ ref, ...props }: InputPadderProps) {
return <div ref={ref} {...props} className="p-2 w-full" />;
}
// ---------------------------------------------------------------------------

388
web/package-lock.json generated
View File

@@ -62,7 +62,7 @@
"mdast-util-find-and-replace": "^3.0.1",
"mime": "^4.1.0",
"motion": "^12.29.0",
"next": "16.1.7",
"next": "16.2.3",
"next-themes": "^0.4.4",
"postcss": "^8.5.6",
"posthog-js": "^1.176.0",
@@ -1676,7 +1676,9 @@
}
},
"node_modules/@img/colour": {
"version": "1.0.0",
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz",
"integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==",
"license": "MIT",
"optional": true,
"engines": {
@@ -1788,9 +1790,9 @@
}
},
"node_modules/@img/sharp-libvips-linux-ppc64": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.3.tgz",
"integrity": "sha512-Y2T7IsQvJLMCBM+pmPbM3bKT/yYJvVtLJGfCs4Sp95SjvnFIjynbjzsa7dY1fRJX45FTSfDksbTp6AGWudiyCg==",
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.4.tgz",
"integrity": "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==",
"cpu": [
"ppc64"
],
@@ -1803,6 +1805,22 @@
"url": "https://opencollective.com/libvips"
}
},
"node_modules/@img/sharp-libvips-linux-riscv64": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-riscv64/-/sharp-libvips-linux-riscv64-1.2.4.tgz",
"integrity": "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==",
"cpu": [
"riscv64"
],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
"linux"
],
"funding": {
"url": "https://opencollective.com/libvips"
}
},
"node_modules/@img/sharp-libvips-linux-s390x": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.0.4.tgz",
@@ -1912,9 +1930,9 @@
}
},
"node_modules/@img/sharp-linux-ppc64": {
"version": "0.34.4",
"resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.4.tgz",
"integrity": "sha512-F4PDtF4Cy8L8hXA2p3TO6s4aDt93v+LKmpcYFLAVdkkD3hSxZzee0rh6/+94FpAynsuMpLX5h+LRsSG3rIciUQ==",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.5.tgz",
"integrity": "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==",
"cpu": [
"ppc64"
],
@@ -1930,7 +1948,29 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
"@img/sharp-libvips-linux-ppc64": "1.2.3"
"@img/sharp-libvips-linux-ppc64": "1.2.4"
}
},
"node_modules/@img/sharp-linux-riscv64": {
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-linux-riscv64/-/sharp-linux-riscv64-0.34.5.tgz",
"integrity": "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==",
"cpu": [
"riscv64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
},
"funding": {
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
"@img/sharp-libvips-linux-riscv64": "1.2.4"
}
},
"node_modules/@img/sharp-linux-s390x": {
@@ -2041,9 +2081,9 @@
}
},
"node_modules/@img/sharp-win32-arm64": {
"version": "0.34.4",
"resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.4.tgz",
"integrity": "sha512-2Q250do/5WXTwxW3zjsEuMSv5sUU4Tq9VThWKlU2EYLm4MB7ZeMwF+SFJutldYODXF6jzc6YEOC+VfX0SZQPqA==",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.5.tgz",
"integrity": "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==",
"cpu": [
"arm64"
],
@@ -2897,9 +2937,9 @@
}
},
"node_modules/@next/env": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
"integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
"version": "16.2.3",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.2.3.tgz",
"integrity": "sha512-ZWXyj4uNu4GCWQw9cjRxWlbD+33mcDszIo9iQxFnBX3Wmgq9ulaSJcl6VhuWx5pCWqqD+9W6Wfz7N0lM5lYPMA==",
"license": "MIT"
},
"node_modules/@next/eslint-plugin-next": {
@@ -2943,9 +2983,9 @@
}
},
"node_modules/@next/swc-darwin-arm64": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
"integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
"version": "16.2.3",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.2.3.tgz",
"integrity": "sha512-u37KDKTKQ+OQLvY+z7SNXixwo4Q2/IAJFDzU1fYe66IbCE51aDSAzkNDkWmLN0yjTUh4BKBd+hb69jYn6qqqSg==",
"cpu": [
"arm64"
],
@@ -2959,9 +2999,9 @@
}
},
"node_modules/@next/swc-darwin-x64": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
"integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
"version": "16.2.3",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.2.3.tgz",
"integrity": "sha512-gHjL/qy6Q6CG3176FWbAKyKh9IfntKZTB3RY/YOJdDFpHGsUDXVH38U4mMNpHVGXmeYW4wj22dMp1lTfmu/bTQ==",
"cpu": [
"x64"
],
@@ -2975,9 +3015,9 @@
}
},
"node_modules/@next/swc-linux-arm64-gnu": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
"integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
"version": "16.2.3",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.2.3.tgz",
"integrity": "sha512-U6vtblPtU/P14Y/b/n9ZY0GOxbbIhTFuaFR7F4/uMBidCi2nSdaOFhA0Go81L61Zd6527+yvuX44T4ksnf8T+Q==",
"cpu": [
"arm64"
],
@@ -2991,9 +3031,9 @@
}
},
"node_modules/@next/swc-linux-arm64-musl": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
"integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
"version": "16.2.3",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.2.3.tgz",
"integrity": "sha512-/YV0LgjHUmfhQpn9bVoGc4x4nan64pkhWR5wyEV8yCOfwwrH630KpvRg86olQHTwHIn1z59uh6JwKvHq1h4QEw==",
"cpu": [
"arm64"
],
@@ -3007,9 +3047,9 @@
}
},
"node_modules/@next/swc-linux-x64-gnu": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
"integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
"version": "16.2.3",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.2.3.tgz",
"integrity": "sha512-/HiWEcp+WMZ7VajuiMEFGZ6cg0+aYZPqCJD3YJEfpVWQsKYSjXQG06vJP6F1rdA03COD9Fef4aODs3YxKx+RDQ==",
"cpu": [
"x64"
],
@@ -3023,9 +3063,9 @@
}
},
"node_modules/@next/swc-linux-x64-musl": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
"integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
"version": "16.2.3",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.2.3.tgz",
"integrity": "sha512-Kt44hGJfZSefebhk/7nIdivoDr3Ugp5+oNz9VvF3GUtfxutucUIHfIO0ZYO8QlOPDQloUVQn4NVC/9JvHRk9hw==",
"cpu": [
"x64"
],
@@ -3039,9 +3079,9 @@
}
},
"node_modules/@next/swc-win32-arm64-msvc": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
"integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
"version": "16.2.3",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.2.3.tgz",
"integrity": "sha512-O2NZ9ie3Tq6xj5Z5CSwBT3+aWAMW2PIZ4egUi9MaWLkwaehgtB7YZjPm+UpcNpKOme0IQuqDcor7BsW6QBiQBw==",
"cpu": [
"arm64"
],
@@ -3055,9 +3095,9 @@
}
},
"node_modules/@next/swc-win32-x64-msvc": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
"integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
"version": "16.2.3",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.2.3.tgz",
"integrity": "sha512-Ibm29/GgB/ab5n7XKqlStkm54qqZE8v2FnijUPBgrd67FWrac45o/RsNlaOWjme/B5UqeWt/8KM4aWBwA1D2Kw==",
"cpu": [
"x64"
],
@@ -14088,12 +14128,12 @@
"license": "MIT"
},
"node_modules/next": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
"integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
"version": "16.2.3",
"resolved": "https://registry.npmjs.org/next/-/next-16.2.3.tgz",
"integrity": "sha512-9V3zV4oZFza3PVev5/poB9g0dEafVcgNyQ8eTRop8GvxZjV2G15FC5ARuG1eFD42QgeYkzJBJzHghNP8Ad9xtA==",
"license": "MIT",
"dependencies": {
"@next/env": "16.1.7",
"@next/env": "16.2.3",
"@swc/helpers": "0.5.15",
"baseline-browser-mapping": "^2.9.19",
"caniuse-lite": "^1.0.30001579",
@@ -14107,15 +14147,15 @@
"node": ">=20.9.0"
},
"optionalDependencies": {
"@next/swc-darwin-arm64": "16.1.7",
"@next/swc-darwin-x64": "16.1.7",
"@next/swc-linux-arm64-gnu": "16.1.7",
"@next/swc-linux-arm64-musl": "16.1.7",
"@next/swc-linux-x64-gnu": "16.1.7",
"@next/swc-linux-x64-musl": "16.1.7",
"@next/swc-win32-arm64-msvc": "16.1.7",
"@next/swc-win32-x64-msvc": "16.1.7",
"sharp": "^0.34.4"
"@next/swc-darwin-arm64": "16.2.3",
"@next/swc-darwin-x64": "16.2.3",
"@next/swc-linux-arm64-gnu": "16.2.3",
"@next/swc-linux-arm64-musl": "16.2.3",
"@next/swc-linux-x64-gnu": "16.2.3",
"@next/swc-linux-x64-musl": "16.2.3",
"@next/swc-win32-arm64-msvc": "16.2.3",
"@next/swc-win32-x64-msvc": "16.2.3",
"sharp": "^0.34.5"
},
"peerDependencies": {
"@opentelemetry/api": "^1.1.0",
@@ -14148,10 +14188,32 @@
"react-dom": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc"
}
},
"node_modules/next/node_modules/@img/sharp-darwin-arm64": {
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.5.tgz",
"integrity": "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
},
"funding": {
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
"@img/sharp-libvips-darwin-arm64": "1.2.4"
}
},
"node_modules/next/node_modules/@img/sharp-darwin-x64": {
"version": "0.34.4",
"resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.4.tgz",
"integrity": "sha512-rZheupWIoa3+SOdF/IcUe1ah4ZDpKBGWcsPX6MT0lYniH9micvIU7HQkYTfrx5Xi8u+YqwLtxC/3vl8TQN6rMg==",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.5.tgz",
"integrity": "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==",
"cpu": [
"x64"
],
@@ -14167,13 +14229,29 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
"@img/sharp-libvips-darwin-x64": "1.2.3"
"@img/sharp-libvips-darwin-x64": "1.2.4"
}
},
"node_modules/next/node_modules/@img/sharp-libvips-darwin-arm64": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.4.tgz",
"integrity": "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==",
"cpu": [
"arm64"
],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
"darwin"
],
"funding": {
"url": "https://opencollective.com/libvips"
}
},
"node_modules/next/node_modules/@img/sharp-libvips-darwin-x64": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.3.tgz",
"integrity": "sha512-Ju+g2xn1E2AKO6YBhxjj+ACcsPQRHT0bhpglxcEf+3uyPY+/gL8veniKoo96335ZaPo03bdDXMv0t+BBFAbmRA==",
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.4.tgz",
"integrity": "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==",
"cpu": [
"x64"
],
@@ -14187,9 +14265,9 @@
}
},
"node_modules/next/node_modules/@img/sharp-libvips-linux-arm": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.3.tgz",
"integrity": "sha512-x1uE93lyP6wEwGvgAIV0gP6zmaL/a0tGzJs/BIDDG0zeBhMnuUPm7ptxGhUbcGs4okDJrk4nxgrmxpib9g6HpA==",
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.4.tgz",
"integrity": "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==",
"cpu": [
"arm"
],
@@ -14203,9 +14281,9 @@
}
},
"node_modules/next/node_modules/@img/sharp-libvips-linux-arm64": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.3.tgz",
"integrity": "sha512-I4RxkXU90cpufazhGPyVujYwfIm9Nk1QDEmiIsaPwdnm013F7RIceaCc87kAH+oUB1ezqEvC6ga4m7MSlqsJvQ==",
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.4.tgz",
"integrity": "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==",
"cpu": [
"arm64"
],
@@ -14219,9 +14297,9 @@
}
},
"node_modules/next/node_modules/@img/sharp-libvips-linux-s390x": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.3.tgz",
"integrity": "sha512-RgWrs/gVU7f+K7P+KeHFaBAJlNkD1nIZuVXdQv6S+fNA6syCcoboNjsV2Pou7zNlVdNQoQUpQTk8SWDHUA3y/w==",
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.4.tgz",
"integrity": "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==",
"cpu": [
"s390x"
],
@@ -14235,9 +14313,9 @@
}
},
"node_modules/next/node_modules/@img/sharp-libvips-linux-x64": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.3.tgz",
"integrity": "sha512-3JU7LmR85K6bBiRzSUc/Ff9JBVIFVvq6bomKE0e63UXGeRw2HPVEjoJke1Yx+iU4rL7/7kUjES4dZ/81Qjhyxg==",
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.4.tgz",
"integrity": "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==",
"cpu": [
"x64"
],
@@ -14251,9 +14329,9 @@
}
},
"node_modules/next/node_modules/@img/sharp-libvips-linuxmusl-arm64": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.3.tgz",
"integrity": "sha512-F9q83RZ8yaCwENw1GieztSfj5msz7GGykG/BA+MOUefvER69K/ubgFHNeSyUu64amHIYKGDs4sRCMzXVj8sEyw==",
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.4.tgz",
"integrity": "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==",
"cpu": [
"arm64"
],
@@ -14267,9 +14345,9 @@
}
},
"node_modules/next/node_modules/@img/sharp-libvips-linuxmusl-x64": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.3.tgz",
"integrity": "sha512-U5PUY5jbc45ANM6tSJpsgqmBF/VsL6LnxJmIf11kB7J5DctHgqm0SkuXzVWtIY90GnJxKnC/JT251TDnk1fu/g==",
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.4.tgz",
"integrity": "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==",
"cpu": [
"x64"
],
@@ -14283,9 +14361,9 @@
}
},
"node_modules/next/node_modules/@img/sharp-linux-arm": {
"version": "0.34.4",
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.4.tgz",
"integrity": "sha512-Xyam4mlqM0KkTHYVSuc6wXRmM7LGN0P12li03jAnZ3EJWZqj83+hi8Y9UxZUbxsgsK1qOEwg7O0Bc0LjqQVtxA==",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.5.tgz",
"integrity": "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==",
"cpu": [
"arm"
],
@@ -14301,13 +14379,13 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
"@img/sharp-libvips-linux-arm": "1.2.3"
"@img/sharp-libvips-linux-arm": "1.2.4"
}
},
"node_modules/next/node_modules/@img/sharp-linux-arm64": {
"version": "0.34.4",
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.4.tgz",
"integrity": "sha512-YXU1F/mN/Wu786tl72CyJjP/Ngl8mGHN1hST4BGl+hiW5jhCnV2uRVTNOcaYPs73NeT/H8Upm3y9582JVuZHrQ==",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.5.tgz",
"integrity": "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==",
"cpu": [
"arm64"
],
@@ -14323,13 +14401,13 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
"@img/sharp-libvips-linux-arm64": "1.2.3"
"@img/sharp-libvips-linux-arm64": "1.2.4"
}
},
"node_modules/next/node_modules/@img/sharp-linux-s390x": {
"version": "0.34.4",
"resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.4.tgz",
"integrity": "sha512-qVrZKE9Bsnzy+myf7lFKvng6bQzhNUAYcVORq2P7bDlvmF6u2sCmK2KyEQEBdYk+u3T01pVsPrkj943T1aJAsw==",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.5.tgz",
"integrity": "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==",
"cpu": [
"s390x"
],
@@ -14345,13 +14423,13 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
"@img/sharp-libvips-linux-s390x": "1.2.3"
"@img/sharp-libvips-linux-s390x": "1.2.4"
}
},
"node_modules/next/node_modules/@img/sharp-linux-x64": {
"version": "0.34.4",
"resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.4.tgz",
"integrity": "sha512-ZfGtcp2xS51iG79c6Vhw9CWqQC8l2Ot8dygxoDoIQPTat/Ov3qAa8qpxSrtAEAJW+UjTXc4yxCjNfxm4h6Xm2A==",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.5.tgz",
"integrity": "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==",
"cpu": [
"x64"
],
@@ -14367,13 +14445,13 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
"@img/sharp-libvips-linux-x64": "1.2.3"
"@img/sharp-libvips-linux-x64": "1.2.4"
}
},
"node_modules/next/node_modules/@img/sharp-linuxmusl-arm64": {
"version": "0.34.4",
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.4.tgz",
"integrity": "sha512-8hDVvW9eu4yHWnjaOOR8kHVrew1iIX+MUgwxSuH2XyYeNRtLUe4VNioSqbNkB7ZYQJj9rUTT4PyRscyk2PXFKA==",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.5.tgz",
"integrity": "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==",
"cpu": [
"arm64"
],
@@ -14389,13 +14467,13 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
"@img/sharp-libvips-linuxmusl-arm64": "1.2.3"
"@img/sharp-libvips-linuxmusl-arm64": "1.2.4"
}
},
"node_modules/next/node_modules/@img/sharp-linuxmusl-x64": {
"version": "0.34.4",
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.4.tgz",
"integrity": "sha512-lU0aA5L8QTlfKjpDCEFOZsTYGn3AEiO6db8W5aQDxj0nQkVrZWmN3ZP9sYKWJdtq3PWPhUNlqehWyXpYDcI9Sg==",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.5.tgz",
"integrity": "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==",
"cpu": [
"x64"
],
@@ -14411,20 +14489,20 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
"@img/sharp-libvips-linuxmusl-x64": "1.2.3"
"@img/sharp-libvips-linuxmusl-x64": "1.2.4"
}
},
"node_modules/next/node_modules/@img/sharp-wasm32": {
"version": "0.34.4",
"resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.4.tgz",
"integrity": "sha512-33QL6ZO/qpRyG7woB/HUALz28WnTMI2W1jgX3Nu2bypqLIKx/QKMILLJzJjI+SIbvXdG9fUnmrxR7vbi1sTBeA==",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.5.tgz",
"integrity": "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==",
"cpu": [
"wasm32"
],
"license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT",
"optional": true,
"dependencies": {
"@emnapi/runtime": "^1.5.0"
"@emnapi/runtime": "^1.7.0"
},
"engines": {
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
@@ -14434,9 +14512,9 @@
}
},
"node_modules/next/node_modules/@img/sharp-win32-ia32": {
"version": "0.34.4",
"resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.4.tgz",
"integrity": "sha512-3ZeLue5V82dT92CNL6rsal6I2weKw1cYu+rGKm8fOCCtJTR2gYeUfY3FqUnIJsMUPIH68oS5jmZ0NiJ508YpEw==",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.5.tgz",
"integrity": "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==",
"cpu": [
"ia32"
],
@@ -14453,9 +14531,9 @@
}
},
"node_modules/next/node_modules/@img/sharp-win32-x64": {
"version": "0.34.4",
"resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.4.tgz",
"integrity": "sha512-xIyj4wpYs8J18sVN3mSQjwrw7fKUqRw+Z5rnHNCy5fYTxigBz81u5mOMPmFumwjcn8+ld1ppptMBCLic1nz6ig==",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.5.tgz",
"integrity": "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==",
"cpu": [
"x64"
],
@@ -14498,14 +14576,16 @@
}
},
"node_modules/next/node_modules/sharp": {
"version": "0.34.4",
"version": "0.34.5",
"resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz",
"integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==",
"hasInstallScript": true,
"license": "Apache-2.0",
"optional": true,
"dependencies": {
"@img/colour": "^1.0.0",
"detect-libc": "^2.1.0",
"semver": "^7.7.2"
"detect-libc": "^2.1.2",
"semver": "^7.7.3"
},
"engines": {
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
@@ -14514,62 +14594,30 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
"@img/sharp-darwin-arm64": "0.34.4",
"@img/sharp-darwin-x64": "0.34.4",
"@img/sharp-libvips-darwin-arm64": "1.2.3",
"@img/sharp-libvips-darwin-x64": "1.2.3",
"@img/sharp-libvips-linux-arm": "1.2.3",
"@img/sharp-libvips-linux-arm64": "1.2.3",
"@img/sharp-libvips-linux-ppc64": "1.2.3",
"@img/sharp-libvips-linux-s390x": "1.2.3",
"@img/sharp-libvips-linux-x64": "1.2.3",
"@img/sharp-libvips-linuxmusl-arm64": "1.2.3",
"@img/sharp-libvips-linuxmusl-x64": "1.2.3",
"@img/sharp-linux-arm": "0.34.4",
"@img/sharp-linux-arm64": "0.34.4",
"@img/sharp-linux-ppc64": "0.34.4",
"@img/sharp-linux-s390x": "0.34.4",
"@img/sharp-linux-x64": "0.34.4",
"@img/sharp-linuxmusl-arm64": "0.34.4",
"@img/sharp-linuxmusl-x64": "0.34.4",
"@img/sharp-wasm32": "0.34.4",
"@img/sharp-win32-arm64": "0.34.4",
"@img/sharp-win32-ia32": "0.34.4",
"@img/sharp-win32-x64": "0.34.4"
}
},
"node_modules/next/node_modules/sharp/node_modules/@img/sharp-darwin-arm64": {
"version": "0.34.4",
"cpu": [
"arm64"
],
"license": "Apache-2.0",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
},
"funding": {
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
"@img/sharp-libvips-darwin-arm64": "1.2.3"
}
},
"node_modules/next/node_modules/sharp/node_modules/@img/sharp-libvips-darwin-arm64": {
"version": "1.2.3",
"cpu": [
"arm64"
],
"license": "LGPL-3.0-or-later",
"optional": true,
"os": [
"darwin"
],
"funding": {
"url": "https://opencollective.com/libvips"
"@img/sharp-darwin-arm64": "0.34.5",
"@img/sharp-darwin-x64": "0.34.5",
"@img/sharp-libvips-darwin-arm64": "1.2.4",
"@img/sharp-libvips-darwin-x64": "1.2.4",
"@img/sharp-libvips-linux-arm": "1.2.4",
"@img/sharp-libvips-linux-arm64": "1.2.4",
"@img/sharp-libvips-linux-ppc64": "1.2.4",
"@img/sharp-libvips-linux-riscv64": "1.2.4",
"@img/sharp-libvips-linux-s390x": "1.2.4",
"@img/sharp-libvips-linux-x64": "1.2.4",
"@img/sharp-libvips-linuxmusl-arm64": "1.2.4",
"@img/sharp-libvips-linuxmusl-x64": "1.2.4",
"@img/sharp-linux-arm": "0.34.5",
"@img/sharp-linux-arm64": "0.34.5",
"@img/sharp-linux-ppc64": "0.34.5",
"@img/sharp-linux-riscv64": "0.34.5",
"@img/sharp-linux-s390x": "0.34.5",
"@img/sharp-linux-x64": "0.34.5",
"@img/sharp-linuxmusl-arm64": "0.34.5",
"@img/sharp-linuxmusl-x64": "0.34.5",
"@img/sharp-wasm32": "0.34.5",
"@img/sharp-win32-arm64": "0.34.5",
"@img/sharp-win32-ia32": "0.34.5",
"@img/sharp-win32-x64": "0.34.5"
}
},
"node_modules/node-fetch": {

View File

@@ -80,7 +80,7 @@
"mdast-util-find-and-replace": "^3.0.1",
"mime": "^4.1.0",
"motion": "^12.29.0",
"next": "16.1.7",
"next": "16.2.3",
"next-themes": "^0.4.4",
"postcss": "^8.5.6",
"posthog-js": "^1.176.0",

View File

@@ -12,12 +12,7 @@ import {
useRef,
useState,
} from "react";
import {
Tooltip,
TooltipContent,
TooltipProvider,
TooltipTrigger,
} from "@/components/ui/tooltip";
import { Tooltip } from "@opal/components";
import { useFederatedConnectors } from "@/lib/hooks";
import {
FederatedConnectorDetail,
@@ -96,33 +91,31 @@ function SourceTileTooltipWrapper({
}
return (
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<div>
<SourceTile
sourceMetadata={sourceMetadata}
preSelect={preSelect}
navigationUrl={navigationUrl}
hasExistingSlackCredentials={!!hasExistingSlackCredentials}
/>
</div>
</TooltipTrigger>
<TooltipContent side="top" className="max-w-sm">
{existingFederatedConnector ? (
<Text as="p" textLight05 secondaryBody>
<strong>Federated connector already configured.</strong> Click to
edit the existing connector.
</Text>
) : hasExistingSlackCredentials ? (
<Text as="p" textLight05 secondaryBody>
<strong>Existing Slack credentials found.</strong> Click to manage
your Slack connector.
</Text>
) : null}
</TooltipContent>
</Tooltip>
</TooltipProvider>
<Tooltip
side="top"
tooltip={
existingFederatedConnector ? (
<Text as="p" textLight05 secondaryBody>
<strong>Federated connector already configured.</strong> Click to
edit the existing connector.
</Text>
) : hasExistingSlackCredentials ? (
<Text as="p" textLight05 secondaryBody>
<strong>Existing Slack credentials found.</strong> Click to manage
your Slack connector.
</Text>
) : undefined
}
>
<div>
<SourceTile
sourceMetadata={sourceMetadata}
preSelect={preSelect}
navigationUrl={navigationUrl}
hasExistingSlackCredentials={!!hasExistingSlackCredentials}
/>
</div>
</Tooltip>
);
}

View File

@@ -6,9 +6,8 @@ import { Section } from "@/layouts/general-layouts";
import { Content, InputErrorText, InputVertical } from "@opal/layouts";
import Card from "@/refresh-components/cards/Card";
import Button from "@/refresh-components/buttons/Button";
import { Button as OpalButton } from "@opal/components";
import { Button as OpalButton, MessageCard } from "@opal/components";
import Text from "@/refresh-components/texts/Text";
import Message from "@/refresh-components/messages/Message";
import InfoBlock from "@/refresh-components/messages/InfoBlock";
import InputNumber from "@/refresh-components/inputs/InputNumber";
import {
@@ -637,35 +636,27 @@ export default function BillingDetailsView({
<Section gap={1} height="auto" width="full">
{/* Stripe connection error banner */}
{hasStripeError && (
<Message
static
warning
text="Unable to connect to Stripe payment portal."
<MessageCard
variant="warning"
title="Unable to connect to Stripe payment portal."
description="Check your internet connection or manually provide a license."
close={false}
className="w-full"
/>
)}
{/* Air-gapped mode info banner */}
{isAirGapped && !hasStripeError && !isManualLicenseOnly && (
<Message
static
info
text="Air-gapped deployment"
<MessageCard
variant="info"
title="Air-gapped deployment"
description="Online billing management is disabled. Contact support to update your subscription."
close={false}
className="w-full"
/>
)}
{/* Expiration banner */}
{expirationState && (
<Message
static
warning={expirationState.variant === "warning"}
error={expirationState.variant === "error"}
text={
<MessageCard
variant={expirationState.variant}
title={
expirationState.variant === "error"
? expirationState.daysUntilDeletion
? `Your subscription has expired. Data will be deleted in ${expirationState.daysUntilDeletion} days.`
@@ -679,8 +670,6 @@ export default function BillingDetailsView({
: "Renew your subscription to restore access to paid features."
: `Renew your subscription by ${expirationState.expirationDate} to avoid disruption.`
}
close={false}
className="w-full"
/>
)}

View File

@@ -69,30 +69,33 @@ jest.mock("./LicenseActivationCard", () => ({
default: () => <div data-testid="license-activation-card" />,
}));
jest.mock("@/refresh-components/messages/Message", () => ({
__esModule: true,
default: ({
text,
description,
onClose,
}: {
text: string;
description?: string;
onClose?: () => void;
}) => (
<div data-testid="activating-banner">
<span data-testid="activating-banner-text">{text}</span>
{description && (
<span data-testid="activating-banner-description">{description}</span>
)}
{onClose && (
<button data-testid="activating-banner-close" onClick={onClose}>
Close
</button>
)}
</div>
),
}));
jest.mock("@opal/components", () => {
const actual = jest.requireActual("@opal/components");
return {
...actual,
MessageCard: ({
title,
description,
onClose,
}: {
title: string;
description?: string;
onClose?: () => void;
}) => (
<div data-testid="activating-banner">
<span data-testid="activating-banner-text">{title}</span>
{description && (
<span data-testid="activating-banner-description">{description}</span>
)}
{onClose && (
<button data-testid="activating-banner-close" onClick={onClose}>
Close
</button>
)}
</div>
),
};
});
jest.mock("@/lib/billing", () => ({
useBillingInformation: jest.fn(),

View File

@@ -19,7 +19,7 @@ import {
import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
import { SWR_KEYS } from "@/lib/swr-keys";
import { useUser } from "@/providers/UserProvider";
import Message from "@/refresh-components/messages/Message";
import { MessageCard } from "@opal/components";
import PlansView from "./PlansView";
import CheckoutView from "./CheckoutView";
@@ -484,19 +484,14 @@ export default function BillingPage() {
<SettingsLayouts.Body>
<div className="flex flex-col items-center gap-6">
{isActivating && (
<Message
static
warning
large
text="Your license is still activating"
<MessageCard
variant="warning"
title="Your license is still activating"
description="Your license is being processed. You'll be taken to billing details automatically once confirmed."
icon
close
onClose={() => {
sessionStorage.removeItem(BILLING_ACTIVATING_KEY);
setIsActivating(false);
}}
className="w-full"
/>
)}
{renderContent()}

View File

@@ -23,12 +23,7 @@ import { RadioGroupItemField } from "@/components/ui/RadioGroupItemField";
import { AlertCircle } from "lucide-react";
import { useRouter } from "next/navigation";
import type { Route } from "next";
import {
Tooltip,
TooltipContent,
TooltipTrigger,
} from "@/components/ui/tooltip";
import { TooltipProvider } from "@radix-ui/react-tooltip";
import { Tooltip } from "@opal/components";
import { SourceIcon } from "@/components/SourceIcon";
import Link from "next/link";
import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
@@ -570,14 +565,10 @@ export function SlackChannelConfigFormFields({
<div className="flex mt-8 gap-x-2 w-full justify-end">
{shouldShowPrivacyAlert && (
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<div className="flex hover:bg-background-150 cursor-pointer p-2 rounded-lg items-center">
<AlertCircle className="h-5 w-5 text-alert" />
</div>
</TooltipTrigger>
<TooltipContent side="top" className="bg-background p-4 w-80">
<Tooltip
side="top"
tooltip={
<div className="space-y-2">
<Label className="text-text mb-2 font-semibold">
Privacy Alert
</Label>
@@ -615,9 +606,13 @@ export function SlackChannelConfigFormFields({
))}
</div>
</div>
</TooltipContent>
</Tooltip>
</TooltipProvider>
</div>
}
>
<div className="flex hover:bg-background-150 cursor-pointer p-2 rounded-lg items-center">
<AlertCircle className="h-5 w-5 text-alert" />
</div>
</Tooltip>
)}
<Button type="submit">{isUpdate ? "Update" : "Create"}</Button>
<Button prominence="secondary" onClick={() => router.back()}>

Some files were not shown because too many files have changed in this diff Show More