fix(xlsx): Openpyxl Formatting Issues (#10230 )

feat: opalify MessageCard (#10223 )
fix(chat): hide incomplete citation links during streaming (#10224 )
2026-04-17 23:46:47 +00:00 · 2026-04-15 21:22:58 +00:00 · 2026-04-15 21:11:18 +00:00 · 2026-04-15 21:10:06 +00:00 · 2026-04-15 21:07:00 +00:00 · 2026-04-15 21:06:19 +00:00
175 changed files with 8691 additions and 2672 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -2,6 +2,7 @@ FROM ubuntu:26.04@sha256:cc925e589b7543b910fea57a240468940003fbfc0515245a495dd0a

 RUN apt-get update && apt-get install -y --no-install-recommends \
  curl \
+  default-jre \
  fd-find \
  fzf \
  git \
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,7 +1,7 @@
 {
  "name": "Onyx Dev Sandbox",
-  "image": "onyxdotapp/onyx-devcontainer@sha256:12184169c5bcc9cca0388286d5ffe504b569bc9c37bfa631b76ee8eee2064055",
-  "runArgs": ["--cap-add=NET_ADMIN", "--cap-add=NET_RAW"],
+  "image": "onyxdotapp/onyx-devcontainer@sha256:0f02d9299928849c7b15f3b348dcfdcdcb64411ff7a4580cbc026a6ee7aa1554",
+  "runArgs": ["--cap-add=NET_ADMIN", "--cap-add=NET_RAW", "--network=onyx_default"],
  "mounts": [
    "source=${localEnv:HOME}/.claude,target=/home/dev/.claude,type=bind",
    "source=${localEnv:HOME}/.claude.json,target=/home/dev/.claude.json,type=bind",
@@ -12,10 +12,13 @@
    "source=onyx-devcontainer-local,target=/home/dev/.local,type=volume"
  ],
  "containerEnv": {
-    "SSH_AUTH_SOCK": "/tmp/ssh-agent.sock"
+    "SSH_AUTH_SOCK": "/tmp/ssh-agent.sock",
+    "POSTGRES_HOST": "relational_db",
+    "REDIS_HOST": "cache"
  },
  "remoteUser": "${localEnv:DEVCONTAINER_REMOTE_USER:dev}",
  "updateRemoteUserUID": false,
+  "initializeCommand": "docker network create onyx_default 2>/dev/null || true",
  "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=delegated",
  "workspaceFolder": "/workspace",
  "postStartCommand": "sudo bash /workspace/.devcontainer/init-dev-user.sh && sudo bash /workspace/.devcontainer/init-firewall.sh",
--- a/.devcontainer/init-firewall.sh
+++ b/.devcontainer/init-firewall.sh
@@ -4,22 +4,12 @@ set -euo pipefail

 echo "Setting up firewall..."

-# Preserve docker dns resolution
-DOCKER_DNS_RULES=$(iptables-save | grep -E "^-A.*-d 127.0.0.11/32" || true)
-
-# Flush all rules
-iptables -t nat -F
-iptables -t nat -X
-iptables -t mangle -F
-iptables -t mangle -X
+# Only flush the filter table.  The nat and mangle tables are managed by Docker
+# (DNS DNAT to 127.0.0.11, container networking, etc.) and must not be touched —
+# flushing them breaks Docker's embedded DNS resolver.
 iptables -F
 iptables -X

-# Restore docker dns rules
-if [ -n "$DOCKER_DNS_RULES" ]; then
-    echo "$DOCKER_DNS_RULES" | iptables-restore -n
-fi
-
 # Create ipset for allowed destinations
 ipset create allowed-domains hash:net || true
 ipset flush allowed-domains
@@ -34,6 +24,7 @@ done

 # Resolve allowed domains
 ALLOWED_DOMAINS=(
+    "github.com"
    "registry.npmjs.org"
    "api.anthropic.com"
    "api-staging.anthropic.com"
@@ -65,6 +56,14 @@ if [ -n "$DOCKER_GATEWAY" ]; then
    fi
 fi

+# Allow traffic to all attached Docker network subnets so the container can
+# reach sibling services (e.g. relational_db, cache) on shared compose networks.
+for subnet in $(ip -4 -o addr show scope global | awk '{print $4}'); do
+    if ! ipset add allowed-domains "$subnet" -exist 2>&1; then
+        echo "warning: failed to add Docker subnet $subnet to allowlist" >&2
+    fi
+done
+
 # Set default policies to DROP
 iptables -P FORWARD DROP
 iptables -P INPUT DROP
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -475,6 +475,18 @@
        "order": 0
      }
    },
+    {
+      "name": "Start Monitoring Stack (Prometheus + Grafana)",
+      "type": "node",
+      "request": "launch",
+      "runtimeExecutable": "docker",
+      "runtimeArgs": ["compose", "up", "-d"],
+      "cwd": "${workspaceFolder}/profiling",
+      "console": "integratedTerminal",
+      "presentation": {
+        "group": "3"
+      }
+    },
    {
      "name": "Clear and Restart External Volumes and Containers",
      "type": "node",
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.11.7-slim-bookworm
+FROM python:3.11-slim-bookworm@sha256:9c6f90801e6b68e772b7c0ca74260cbf7af9f320acec894e26fccdaccfbe3b47

 LABEL com.danswer.maintainer="founders@onyx.app"
 LABEL com.danswer.description="This image is the web/frontend container of Onyx which \
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -1,5 +1,5 @@
 # Base stage with dependencies
-FROM python:3.11.7-slim-bookworm AS base
+FROM python:3.11-slim-bookworm@sha256:9c6f90801e6b68e772b7c0ca74260cbf7af9f320acec894e26fccdaccfbe3b47 AS base

 ENV DANSWER_RUNNING_IN_DOCKER="true" \
    HF_HOME=/app/.cache/huggingface
--- a/backend/model_server/main.py
+++ b/backend/model_server/main.py
@@ -96,11 +96,14 @@ def get_model_app() -> FastAPI:
        title="Onyx Model Server", version=__version__, lifespan=lifespan
    )
    if SENTRY_DSN:
+        from onyx.configs.sentry import _add_instance_tags
+
        sentry_sdk.init(
            dsn=SENTRY_DSN,
            integrations=[StarletteIntegration(), FastApiIntegration()],
            traces_sample_rate=0.1,
            release=__version__,
+            before_send=_add_instance_tags,
        )
        logger.info("Sentry initialized")
    else:
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -63,11 +63,14 @@ logger = setup_logger()
 task_logger = get_task_logger(__name__)

 if SENTRY_DSN:
+    from onyx.configs.sentry import _add_instance_tags
+
    sentry_sdk.init(
        dsn=SENTRY_DSN,
        integrations=[CeleryIntegration()],
        traces_sample_rate=0.1,
        release=__version__,
+        before_send=_add_instance_tags,
    )
    logger.info("Sentry initialized")
 else:
--- a/backend/onyx/background/celery/tasks/docfetching/tasks.py
+++ b/backend/onyx/background/celery/tasks/docfetching/tasks.py
@@ -135,10 +135,13 @@ def _docfetching_task(
    # Since connector_indexing_proxy_task spawns a new process using this function as
    # the entrypoint, we init Sentry here.
    if SENTRY_DSN:
+        from onyx.configs.sentry import _add_instance_tags
+
        sentry_sdk.init(
            dsn=SENTRY_DSN,
            traces_sample_rate=0.1,
            release=__version__,
+            before_send=_add_instance_tags,
        )
        logger.info("Sentry initialized")
    else:
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -3,6 +3,7 @@ import os
 import time
 import traceback
 from collections import defaultdict
+from dataclasses import dataclass
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
@@ -50,6 +51,7 @@ from onyx.configs.constants import AuthType
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_INDEXING_LOCK_TIMEOUT
 from onyx.configs.constants import MilestoneRecordType
+from onyx.configs.constants import NotificationType
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
@@ -85,6 +87,8 @@ from onyx.db.indexing_coordination import INDEXING_PROGRESS_TIMEOUT_HOURS
 from onyx.db.indexing_coordination import IndexingCoordination
 from onyx.db.models import IndexAttempt
 from onyx.db.models import SearchSettings
+from onyx.db.notification import create_notification
+from onyx.db.notification import get_notifications
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.swap_index import check_and_perform_index_swap
@@ -105,6 +109,9 @@ from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.redis.redis_pool import redis_lock_dump
 from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
 from onyx.redis.redis_utils import is_fence
+from onyx.server.metrics.connector_health_metrics import on_connector_error_state_change
+from onyx.server.metrics.connector_health_metrics import on_connector_indexing_success
+from onyx.server.metrics.connector_health_metrics import on_index_attempt_status_change
 from onyx.server.runtime.onyx_runtime import OnyxRuntime
 from onyx.utils.logger import setup_logger
 from onyx.utils.middleware import make_randomized_onyx_request_id
@@ -400,7 +407,6 @@ def check_indexing_completion(
    tenant_id: str,
    task: Task,
 ) -> None:
-
    logger.info(
        f"Checking for indexing completion: attempt={index_attempt_id} tenant={tenant_id}"
    )
@@ -521,13 +527,23 @@ def check_indexing_completion(

        # Update CC pair status if successful
        cc_pair = get_connector_credential_pair_from_id(
-            db_session, attempt.connector_credential_pair_id
+            db_session,
+            attempt.connector_credential_pair_id,
+            eager_load_connector=True,
        )
        if cc_pair is None:
            raise RuntimeError(
                f"CC pair {attempt.connector_credential_pair_id} not found in database"
            )

+        source = cc_pair.connector.source.value
+        on_index_attempt_status_change(
+            tenant_id=tenant_id,
+            source=source,
+            cc_pair_id=cc_pair.id,
+            status=attempt.status.value,
+        )
+
        if attempt.status.is_successful():
            # NOTE: we define the last successful index time as the time the last successful
            # attempt finished. This is distinct from the poll_range_end of the last successful
@@ -548,10 +564,39 @@ def check_indexing_completion(
                event=MilestoneRecordType.CONNECTOR_SUCCEEDED,
            )

+            on_connector_indexing_success(
+                tenant_id=tenant_id,
+                source=source,
+                cc_pair_id=cc_pair.id,
+                docs_indexed=attempt.new_docs_indexed or 0,
+                success_timestamp=attempt.time_updated.timestamp(),
+            )
+
            # Clear repeated error state on success
            if cc_pair.in_repeated_error_state:
                cc_pair.in_repeated_error_state = False
+
+                # Delete any existing error notification for this CC pair so a
+                # fresh one is created if the connector fails again later.
+                for notif in get_notifications(
+                    user=None,
+                    db_session=db_session,
+                    notif_type=NotificationType.CONNECTOR_REPEATED_ERRORS,
+                    include_dismissed=True,
+                ):
+                    if (
+                        notif.additional_data
+                        and notif.additional_data.get("cc_pair_id") == cc_pair.id
+                    ):
+                        db_session.delete(notif)
+
                db_session.commit()
+                on_connector_error_state_change(
+                    tenant_id=tenant_id,
+                    source=source,
+                    cc_pair_id=cc_pair.id,
+                    in_error=False,
+                )

            if attempt.status == IndexingStatus.SUCCESS:
                logger.info(
@@ -608,6 +653,27 @@ def active_indexing_attempt(
    return bool(active_indexing_attempt)


+@dataclass
+class _KickoffResult:
+    """Tracks diagnostic counts from a _kickoff_indexing_tasks run."""
+
+    created: int = 0
+    skipped_active: int = 0
+    skipped_not_found: int = 0
+    skipped_not_indexable: int = 0
+    failed_to_create: int = 0
+
+    @property
+    def evaluated(self) -> int:
+        return (
+            self.created
+            + self.skipped_active
+            + self.skipped_not_found
+            + self.skipped_not_indexable
+            + self.failed_to_create
+        )
+
+
 def _kickoff_indexing_tasks(
    celery_app: Celery,
    db_session: Session,
@@ -617,12 +683,12 @@ def _kickoff_indexing_tasks(
    redis_client: Redis,
    lock_beat: RedisLock,
    tenant_id: str,
-) -> int:
+) -> _KickoffResult:
    """Kick off indexing tasks for the given cc_pair_ids and search_settings.

-    Returns the number of tasks successfully created.
+    Returns a _KickoffResult with diagnostic counts.
    """
-    tasks_created = 0
+    result = _KickoffResult()

    for cc_pair_id in cc_pair_ids:
        lock_beat.reacquire()
@@ -633,6 +699,7 @@ def _kickoff_indexing_tasks(
            search_settings_id=search_settings.id,
            db_session=db_session,
        ):
+            result.skipped_active += 1
            continue

        cc_pair = get_connector_credential_pair_from_id(
@@ -643,6 +710,7 @@ def _kickoff_indexing_tasks(
            task_logger.warning(
                f"_kickoff_indexing_tasks - CC pair not found: cc_pair={cc_pair_id}"
            )
+            result.skipped_not_found += 1
            continue

        # Heavyweight check after fetching cc pair
@@ -657,6 +725,7 @@ def _kickoff_indexing_tasks(
                f"search_settings={search_settings.id}, "
                f"secondary_index_building={secondary_index_building}"
            )
+            result.skipped_not_indexable += 1
            continue

        task_logger.debug(
@@ -696,13 +765,14 @@ def _kickoff_indexing_tasks(
            task_logger.info(
                f"Connector indexing queued: index_attempt={attempt_id} cc_pair={cc_pair.id} search_settings={search_settings.id}"
            )
-            tasks_created += 1
+            result.created += 1
        else:
            task_logger.error(
                f"Failed to create indexing task: cc_pair={cc_pair.id} search_settings={search_settings.id}"
            )
+            result.failed_to_create += 1

-    return tasks_created
+    return result


@shared_task(
@@ -728,6 +798,8 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
    task_logger.warning("check_for_indexing - Starting")

    tasks_created = 0
+    primary_result = _KickoffResult()
+    secondary_result: _KickoffResult | None = None
    locked = False
    redis_client = get_redis_client()
    redis_client_replica = get_redis_replica_client()
@@ -848,6 +920,39 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                        cc_pair_id=cc_pair_id,
                        in_repeated_error_state=True,
                    )
+                    on_connector_error_state_change(
+                        tenant_id=tenant_id,
+                        source=cc_pair.connector.source.value,
+                        cc_pair_id=cc_pair_id,
+                        in_error=True,
+                    )
+
+                    connector_name = (
+                        cc_pair.name
+                        or cc_pair.connector.name
+                        or f"CC pair {cc_pair.id}"
+                    )
+                    source = cc_pair.connector.source.value
+                    connector_url = f"/admin/connector/{cc_pair.id}"
+                    create_notification(
+                        user_id=None,
+                        notif_type=NotificationType.CONNECTOR_REPEATED_ERRORS,
+                        db_session=db_session,
+                        title=f"Connector '{connector_name}' has entered repeated error state",
+                        description=(
+                            f"The {source} connector has failed repeatedly and "
+                            f"has been flagged. View indexing history in the "
+                            f"Advanced section: {connector_url}"
+                        ),
+                        additional_data={"cc_pair_id": cc_pair.id},
+                    )
+
+                    task_logger.error(
+                        f"Connector entered repeated error state: "
+                        f"cc_pair={cc_pair.id} "
+                        f"connector={cc_pair.connector.name} "
+                        f"source={source}"
+                    )
                    # When entering repeated error state, also pause the connector
                    # to prevent continued indexing retry attempts burning through embedding credits.
                    # NOTE: only for Cloud, since most self-hosted users use self-hosted embedding
@@ -863,7 +968,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
        # Heavy check, should_index(), is called in _kickoff_indexing_tasks
        with get_session_with_current_tenant() as db_session:
            # Primary first
-            tasks_created += _kickoff_indexing_tasks(
+            primary_result = _kickoff_indexing_tasks(
                celery_app=self.app,
                db_session=db_session,
                search_settings=current_search_settings,
@@ -873,6 +978,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                lock_beat=lock_beat,
                tenant_id=tenant_id,
            )
+            tasks_created += primary_result.created

            # Secondary indexing (only if secondary search settings exist and switchover_type is not INSTANT)
            if (
@@ -880,7 +986,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                and secondary_search_settings.switchover_type != SwitchoverType.INSTANT
                and secondary_cc_pair_ids
            ):
-                tasks_created += _kickoff_indexing_tasks(
+                secondary_result = _kickoff_indexing_tasks(
                    celery_app=self.app,
                    db_session=db_session,
                    search_settings=secondary_search_settings,
@@ -890,6 +996,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                    lock_beat=lock_beat,
                    tenant_id=tenant_id,
                )
+                tasks_created += secondary_result.created
            elif (
                secondary_search_settings
                and secondary_search_settings.switchover_type == SwitchoverType.INSTANT
@@ -1002,7 +1109,26 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                redis_lock_dump(lock_beat, redis_client)

    time_elapsed = time.monotonic() - time_start
-    task_logger.info(f"check_for_indexing finished: elapsed={time_elapsed:.2f}")
+    task_logger.info(
+        f"check_for_indexing finished: "
+        f"elapsed={time_elapsed:.2f}s "
+        f"primary=[evaluated={primary_result.evaluated} "
+        f"created={primary_result.created} "
+        f"skipped_active={primary_result.skipped_active} "
+        f"skipped_not_found={primary_result.skipped_not_found} "
+        f"skipped_not_indexable={primary_result.skipped_not_indexable} "
+        f"failed={primary_result.failed_to_create}]"
+        + (
+            f" secondary=[evaluated={secondary_result.evaluated} "
+            f"created={secondary_result.created} "
+            f"skipped_active={secondary_result.skipped_active} "
+            f"skipped_not_found={secondary_result.skipped_not_found} "
+            f"skipped_not_indexable={secondary_result.skipped_not_indexable} "
+            f"failed={secondary_result.failed_to_create}]"
+            if secondary_result
+            else ""
+        )
+    )
    return tasks_created


--- a/backend/onyx/background/indexing/run_docfetching.py
+++ b/backend/onyx/background/indexing/run_docfetching.py
@@ -5,6 +5,7 @@ from datetime import datetime
 from datetime import timedelta
 from datetime import timezone

+import sentry_sdk
 from celery import Celery
 from sqlalchemy.orm import Session

@@ -68,6 +69,7 @@ from onyx.redis.redis_pool import get_redis_client
 from onyx.server.features.build.indexing.persistent_document_writer import (
    get_persistent_document_writer,
 )
+from onyx.server.metrics.connector_health_metrics import on_index_attempt_status_change
 from onyx.utils.logger import setup_logger
 from onyx.utils.middleware import make_randomized_onyx_request_id
 from onyx.utils.postgres_sanitization import sanitize_document_for_postgres
@@ -267,6 +269,13 @@ def run_docfetching_entrypoint(
        )
        credential_id = attempt.connector_credential_pair.credential_id

+        on_index_attempt_status_change(
+            tenant_id=tenant_id,
+            source=attempt.connector_credential_pair.connector.source.value,
+            cc_pair_id=connector_credential_pair_id,
+            status="in_progress",
+        )
+
    logger.info(
        f"Docfetching starting{tenant_str}: "
        f"connector='{connector_name}' "
@@ -556,6 +565,27 @@ def connector_document_extraction(

                # save record of any failures at the connector level
                if failure is not None:
+                    if failure.exception is not None:
+                        with sentry_sdk.new_scope() as scope:
+                            scope.set_tag("stage", "connector_fetch")
+                            scope.set_tag("connector_source", db_connector.source.value)
+                            scope.set_tag("cc_pair_id", str(cc_pair_id))
+                            scope.set_tag("index_attempt_id", str(index_attempt_id))
+                            scope.set_tag("tenant_id", tenant_id)
+                            if failure.failed_document:
+                                scope.set_tag(
+                                    "doc_id", failure.failed_document.document_id
+                                )
+                            if failure.failed_entity:
+                                scope.set_tag(
+                                    "entity_id", failure.failed_entity.entity_id
+                                )
+                            scope.fingerprint = [
+                                "connector-fetch-failure",
+                                db_connector.source.value,
+                                type(failure.exception).__name__,
+                            ]
+                            sentry_sdk.capture_exception(failure.exception)
                    total_failures += 1
                    with get_session_with_current_tenant() as db_session:
                        create_index_attempt_error(
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -283,6 +283,7 @@ class NotificationType(str, Enum):
    RELEASE_NOTES = "release_notes"
    ASSISTANT_FILES_READY = "assistant_files_ready"
    FEATURE_ANNOUNCEMENT = "feature_announcement"
+    CONNECTOR_REPEATED_ERRORS = "connector_repeated_errors"


 class BlobType(str, Enum):
--- a/backend/onyx/configs/sentry.py
+++ b/backend/onyx/configs/sentry.py
@@ -0,0 +1,48 @@
+from typing import Any
+
+from sentry_sdk.types import Event
+
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+_instance_id_resolved = False
+
+
+def _add_instance_tags(
+    event: Event,
+    hint: dict[str, Any],  # noqa: ARG001
+) -> Event | None:
+    """Sentry before_send hook that lazily attaches instance identification tags.
+
+    On the first event, resolves the instance UUID from the KV store (requires DB)
+    and sets it as a global Sentry tag. Subsequent events pick it up automatically.
+    """
+    global _instance_id_resolved
+
+    if _instance_id_resolved:
+        return event
+
+    try:
+        import sentry_sdk
+
+        from shared_configs.configs import MULTI_TENANT
+
+        if MULTI_TENANT:
+            instance_id = "multi-tenant-cloud"
+        else:
+            from onyx.utils.telemetry import get_or_generate_uuid
+
+            instance_id = get_or_generate_uuid()
+
+        sentry_sdk.set_tag("instance_id", instance_id)
+
+        # Also set on this event since set_tag won't retroactively apply
+        event.setdefault("tags", {})["instance_id"] = instance_id
+
+        # Only mark resolved after success — if DB wasn't ready, retry next event
+        _instance_id_resolved = True
+    except Exception:
+        logger.debug("Failed to resolve instance_id for Sentry tagging")
+
+    return event
--- a/backend/onyx/connectors/canvas/client.py
+++ b/backend/onyx/connectors/canvas/client.py
@@ -27,16 +27,19 @@ _STATUS_TO_ERROR_CODE: dict[int, OnyxErrorCode] = {
    401: OnyxErrorCode.CREDENTIAL_EXPIRED,
    403: OnyxErrorCode.INSUFFICIENT_PERMISSIONS,
    404: OnyxErrorCode.BAD_GATEWAY,
-    429: OnyxErrorCode.RATE_LIMITED,
 }


 def _error_code_for_status(status_code: int) -> OnyxErrorCode:
    """Map an HTTP status code to the appropriate OnyxErrorCode.

-    Expects a >= 400 status code. Known codes (401, 403, 404, 429) are
+    Expects a >= 400 status code. Known codes (401, 403, 404) are
    mapped to specific error codes; all other codes (unrecognised 4xx
    and 5xx) map to BAD_GATEWAY as unexpected upstream errors.
+
+    Note: 429 is intentionally omitted — the rl_requests wrapper
+    handles rate limits transparently at the HTTP layer, so 429
+    responses never reach this function.
    """
    if status_code in _STATUS_TO_ERROR_CODE:
        return _STATUS_TO_ERROR_CODE[status_code]
--- a/backend/onyx/connectors/canvas/connector.py
+++ b/backend/onyx/connectors/canvas/connector.py
@@ -1,10 +1,9 @@
 from datetime import datetime
 from datetime import timezone
+from enum import StrEnum
 from typing import Any
 from typing import cast
-from typing import Literal
 from typing import NoReturn
-from typing import TypeAlias

 from pydantic import BaseModel
 from retry import retry
@@ -25,8 +24,11 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.interfaces import SlimConnectorWithPermSync
 from onyx.connectors.models import ConnectorCheckpoint
+from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
+from onyx.connectors.models import DocumentFailure
+from onyx.connectors.models import EntityFailure
 from onyx.connectors.models import ImageSection
 from onyx.connectors.models import TextSection
 from onyx.error_handling.exceptions import OnyxError
@@ -47,10 +49,6 @@ def _handle_canvas_api_error(e: OnyxError) -> NoReturn:
        raise InsufficientPermissionsError(
            "Canvas API token does not have sufficient permissions (HTTP 403)."
        )
-    elif e.status_code == 429:
-        raise ConnectorValidationError(
-            "Canvas rate-limit exceeded (HTTP 429). Please try again later."
-        )
    elif e.status_code >= 500:
        raise UnexpectedValidationError(
            f"Unexpected Canvas HTTP error (status={e.status_code}): {e}"
@@ -61,6 +59,60 @@ def _handle_canvas_api_error(e: OnyxError) -> NoReturn:
        )


+class CanvasStage(StrEnum):
+    PAGES = "pages"
+    ASSIGNMENTS = "assignments"
+    ANNOUNCEMENTS = "announcements"
+
+
+_STAGE_CONFIG: dict[CanvasStage, dict[str, Any]] = {
+    CanvasStage.PAGES: {
+        "endpoint": "courses/{course_id}/pages",
+        "params": {
+            "per_page": "100",
+            "include[]": "body",
+            "published": "true",
+            "sort": "updated_at",
+            "order": "desc",
+        },
+    },
+    CanvasStage.ASSIGNMENTS: {
+        "endpoint": "courses/{course_id}/assignments",
+        "params": {"per_page": "100", "published": "true"},
+    },
+    CanvasStage.ANNOUNCEMENTS: {
+        "endpoint": "announcements",
+        "params": {
+            "per_page": "100",
+            "context_codes[]": "course_{course_id}",
+            "active_only": "true",
+        },
+    },
+}
+
+
+def _parse_canvas_dt(timestamp_str: str) -> datetime:
+    """Parse a Canvas ISO-8601 timestamp (e.g. '2025-06-15T12:00:00Z')
+    into a timezone-aware UTC datetime.
+
+    Canvas returns timestamps with a trailing 'Z' instead of '+00:00',
+    so we normalise before parsing.
+    """
+    return datetime.fromisoformat(timestamp_str.replace("Z", "+00:00")).astimezone(
+        timezone.utc
+    )
+
+
+def _unix_to_canvas_time(epoch: float) -> str:
+    """Convert a Unix timestamp to Canvas ISO-8601 format (e.g. '2025-06-15T12:00:00Z')."""
+    return datetime.fromtimestamp(epoch, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+
+def _in_time_window(timestamp_str: str, start: float, end: float) -> bool:
+    """Check whether a Canvas ISO-8601 timestamp falls within (start, end]."""
+    return start < _parse_canvas_dt(timestamp_str).timestamp() <= end
+
+
 class CanvasCourse(BaseModel):
    id: int
    name: str | None = None
@@ -145,9 +197,6 @@ class CanvasAnnouncement(BaseModel):
        )


-CanvasStage: TypeAlias = Literal["pages", "assignments", "announcements"]
-
-
 class CanvasConnectorCheckpoint(ConnectorCheckpoint):
    """Checkpoint state for resumable Canvas indexing.

@@ -165,15 +214,30 @@ class CanvasConnectorCheckpoint(ConnectorCheckpoint):

    course_ids: list[int] = []
    current_course_index: int = 0
-    stage: CanvasStage = "pages"
+    stage: CanvasStage = CanvasStage.PAGES
    next_url: str | None = None

    def advance_course(self) -> None:
        """Move to the next course and reset within-course state."""
        self.current_course_index += 1
-        self.stage = "pages"
+        self.stage = CanvasStage.PAGES
        self.next_url = None

+    def advance_stage(self) -> None:
+        """Advance past the current stage.
+
+        Moves to the next stage within the same course, or to the next
+        course if the current stage is the last one. Resets next_url so
+        the next call starts fresh on the new stage.
+        """
+        self.next_url = None
+        stages: list[CanvasStage] = list(CanvasStage)
+        next_idx = stages.index(self.stage) + 1
+        if next_idx < len(stages):
+            self.stage = stages[next_idx]
+        else:
+            self.advance_course()
+

 class CanvasConnector(
    CheckpointedConnectorWithPermSync[CanvasConnectorCheckpoint],
@@ -295,13 +359,7 @@ class CanvasConnector(
        if body_text:
            text_parts.append(body_text)

-        doc_updated_at = (
-            datetime.fromisoformat(page.updated_at.replace("Z", "+00:00")).astimezone(
-                timezone.utc
-            )
-            if page.updated_at
-            else None
-        )
+        doc_updated_at = _parse_canvas_dt(page.updated_at) if page.updated_at else None

        document = self._build_document(
            doc_id=f"canvas-page-{page.course_id}-{page.page_id}",
@@ -325,17 +383,11 @@ class CanvasConnector(
        if desc_text:
            text_parts.append(desc_text)
        if assignment.due_at:
-            due_dt = datetime.fromisoformat(
-                assignment.due_at.replace("Z", "+00:00")
-            ).astimezone(timezone.utc)
+            due_dt = _parse_canvas_dt(assignment.due_at)
            text_parts.append(f"Due: {due_dt.strftime('%B %d, %Y %H:%M UTC')}")

        doc_updated_at = (
-            datetime.fromisoformat(
-                assignment.updated_at.replace("Z", "+00:00")
-            ).astimezone(timezone.utc)
-            if assignment.updated_at
-            else None
+            _parse_canvas_dt(assignment.updated_at) if assignment.updated_at else None
        )

        document = self._build_document(
@@ -361,11 +413,7 @@ class CanvasConnector(
            text_parts.append(msg_text)

        doc_updated_at = (
-            datetime.fromisoformat(
-                announcement.posted_at.replace("Z", "+00:00")
-            ).astimezone(timezone.utc)
-            if announcement.posted_at
-            else None
+            _parse_canvas_dt(announcement.posted_at) if announcement.posted_at else None
        )

        document = self._build_document(
@@ -400,6 +448,314 @@ class CanvasConnector(
        self._canvas_client = client
        return None

+    def _fetch_stage_page(
+        self,
+        next_url: str | None,
+        endpoint: str,
+        params: dict[str, Any],
+    ) -> tuple[list[Any], str | None]:
+        """Fetch one page of API results for the current stage.
+
+        Returns (items, next_url).  All error handling is done by the
+        caller (_load_from_checkpoint).
+        """
+        if next_url:
+            # Resuming mid-pagination: the next_url from Canvas's
+            # Link header already contains endpoint + query params.
+            response, result_next_url = self.canvas_client.get(full_url=next_url)
+        else:
+            # First request for this stage: build from endpoint + params.
+            response, result_next_url = self.canvas_client.get(
+                endpoint=endpoint, params=params
+            )
+        return response or [], result_next_url
+
+    def _process_items(
+        self,
+        response: list[Any],
+        stage: CanvasStage,
+        course_id: int,
+        start: float,
+        end: float,
+        include_permissions: bool,
+    ) -> tuple[list[Document | ConnectorFailure], bool]:
+        """Process a page of API results into documents.
+
+        Returns (docs, early_exit). early_exit is True when pages
+        (sorted desc by updated_at) hit an item older than start,
+        signaling that pagination should stop.
+        """
+        results: list[Document | ConnectorFailure] = []
+        early_exit = False
+
+        for item in response:
+            try:
+                if stage == CanvasStage.PAGES:
+                    page = CanvasPage.from_api(item, course_id=course_id)
+                    if not page.updated_at:
+                        continue
+                    # Pages are sorted by updated_at desc — once we see
+                    # an item at or before `start`, all remaining items
+                    # on this and subsequent pages are older too.
+                    if not _in_time_window(page.updated_at, start, end):
+                        if _parse_canvas_dt(page.updated_at).timestamp() <= start:
+                            early_exit = True
+                            break
+                        # ts > end: page is newer than our window, skip it
+                        continue
+                    doc = self._convert_page_to_document(page)
+                    results.append(
+                        self._maybe_attach_permissions(
+                            doc, course_id, include_permissions
+                        )
+                    )
+
+                elif stage == CanvasStage.ASSIGNMENTS:
+                    assignment = CanvasAssignment.from_api(item, course_id=course_id)
+                    if not assignment.updated_at or not _in_time_window(
+                        assignment.updated_at, start, end
+                    ):
+                        continue
+                    doc = self._convert_assignment_to_document(assignment)
+                    results.append(
+                        self._maybe_attach_permissions(
+                            doc, course_id, include_permissions
+                        )
+                    )
+
+                elif stage == CanvasStage.ANNOUNCEMENTS:
+                    announcement = CanvasAnnouncement.from_api(
+                        item, course_id=course_id
+                    )
+                    if not announcement.posted_at:
+                        logger.debug(
+                            f"Skipping announcement {announcement.id} in "
+                            f"course {course_id}: no posted_at"
+                        )
+                        continue
+                    if not _in_time_window(announcement.posted_at, start, end):
+                        continue
+                    doc = self._convert_announcement_to_document(announcement)
+                    results.append(
+                        self._maybe_attach_permissions(
+                            doc, course_id, include_permissions
+                        )
+                    )
+
+            except Exception as e:
+                item_id = item.get("id") or item.get("page_id", "unknown")
+                if stage == CanvasStage.PAGES:
+                    doc_link = (
+                        f"{self.canvas_base_url}/courses/{course_id}"
+                        f"/pages/{item.get('url', '')}"
+                    )
+                else:
+                    doc_link = item.get("html_url", "")
+                results.append(
+                    ConnectorFailure(
+                        failed_document=DocumentFailure(
+                            document_id=f"canvas-{stage.removesuffix('s')}-{course_id}-{item_id}",
+                            document_link=doc_link,
+                        ),
+                        failure_message=f"Failed to process {stage.removesuffix('s')}: {e}",
+                        exception=e,
+                    )
+                )
+
+        return results, early_exit
+
+    def _maybe_attach_permissions(
+        self,
+        document: Document,
+        course_id: int,
+        include_permissions: bool,
+    ) -> Document:
+        if include_permissions:
+            document.external_access = self._get_course_permissions(course_id)
+        return document
+
+    def _load_from_checkpoint(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: CanvasConnectorCheckpoint,
+        include_permissions: bool = False,
+    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
+        """Shared implementation for load_from_checkpoint and load_from_checkpoint_with_perm_sync."""
+        new_checkpoint = checkpoint.model_copy(deep=True)
+
+        # First call: materialize the list of course IDs.
+        # On failure, let the exception propagate so the framework fails the
+        # attempt cleanly. Swallowing errors here would leave the checkpoint
+        # state unchanged and cause an infinite retry loop.
+        if not new_checkpoint.course_ids:
+            try:
+                courses = self._list_courses()
+            except OnyxError as e:
+                if e.status_code in (401, 403):
+                    _handle_canvas_api_error(e)  # NoReturn — always raises
+                raise
+            new_checkpoint.course_ids = [c.id for c in courses]
+            logger.info(f"Found {len(courses)} Canvas courses to process")
+            new_checkpoint.has_more = len(new_checkpoint.course_ids) > 0
+            return new_checkpoint
+
+        # All courses done.
+        if new_checkpoint.current_course_index >= len(new_checkpoint.course_ids):
+            new_checkpoint.has_more = False
+            return new_checkpoint
+
+        course_id = new_checkpoint.course_ids[new_checkpoint.current_course_index]
+        try:
+            stage = CanvasStage(new_checkpoint.stage)
+        except ValueError as e:
+            raise ValueError(
+                f"Invalid checkpoint stage: {new_checkpoint.stage!r}. "
+                f"Valid stages: {[s.value for s in CanvasStage]}"
+            ) from e
+
+        # Build endpoint + params from the static template.
+        config = _STAGE_CONFIG[stage]
+        endpoint = config["endpoint"].format(course_id=course_id)
+        params = {k: v.format(course_id=course_id) for k, v in config["params"].items()}
+        # Only the announcements API supports server-side date filtering
+        # (start_date/end_date). Pages support server-side sorting
+        # (sort=updated_at desc) enabling early exit, but not date
+        # filtering. Assignments support neither. Both are filtered
+        # client-side via _in_time_window after fetching.
+        if stage == CanvasStage.ANNOUNCEMENTS:
+            params["start_date"] = _unix_to_canvas_time(start)
+            params["end_date"] = _unix_to_canvas_time(end)
+
+        try:
+            response, result_next_url = self._fetch_stage_page(
+                next_url=new_checkpoint.next_url,
+                endpoint=endpoint,
+                params=params,
+            )
+        except OnyxError as oe:
+            # Security errors from _parse_next_link (host/scheme
+            # mismatch on pagination URLs) have no status code override
+            # and must not be silenced.
+            is_api_error = oe._status_code_override is not None
+            if not is_api_error:
+                raise
+            if oe.status_code in (401, 403):
+                _handle_canvas_api_error(oe)  # NoReturn — always raises
+
+            # 404 means the course itself is gone or inaccessible. The
+            # other stages on this course will hit the same 404, so skip
+            # the whole course rather than burning API calls on each stage.
+            if oe.status_code == 404:
+                logger.warning(
+                    f"Canvas course {course_id} not found while fetching "
+                    f"{stage} (HTTP 404). Skipping course."
+                )
+                yield ConnectorFailure(
+                    failed_entity=EntityFailure(
+                        entity_id=f"canvas-course-{course_id}",
+                    ),
+                    failure_message=(f"Canvas course {course_id} not found: {oe}"),
+                    exception=oe,
+                )
+                new_checkpoint.advance_course()
+            else:
+                logger.warning(
+                    f"Failed to fetch {stage} for course {course_id}: {oe}. "
+                    f"Skipping remainder of this stage."
+                )
+                yield ConnectorFailure(
+                    failed_entity=EntityFailure(
+                        entity_id=f"canvas-{stage}-{course_id}",
+                    ),
+                    failure_message=(
+                        f"Failed to fetch {stage} for course {course_id}: {oe}"
+                    ),
+                    exception=oe,
+                )
+                new_checkpoint.advance_stage()
+            new_checkpoint.has_more = new_checkpoint.current_course_index < len(
+                new_checkpoint.course_ids
+            )
+            return new_checkpoint
+        except Exception as e:
+            # Unknown error — skip the stage and try to continue.
+            logger.warning(
+                f"Failed to fetch {stage} for course {course_id}: {e}. "
+                f"Skipping remainder of this stage."
+            )
+            yield ConnectorFailure(
+                failed_entity=EntityFailure(
+                    entity_id=f"canvas-{stage}-{course_id}",
+                ),
+                failure_message=(
+                    f"Failed to fetch {stage} for course {course_id}: {e}"
+                ),
+                exception=e,
+            )
+            new_checkpoint.advance_stage()
+            new_checkpoint.has_more = new_checkpoint.current_course_index < len(
+                new_checkpoint.course_ids
+            )
+            return new_checkpoint
+
+        # Process fetched items
+        results, early_exit = self._process_items(
+            response, stage, course_id, start, end, include_permissions
+        )
+        for result in results:
+            yield result
+
+        # If we hit an item older than our window (pages sorted desc),
+        # skip remaining pagination and advance to the next stage.
+        if early_exit:
+            result_next_url = None
+
+        # If there are more pages, save the cursor and return
+        if result_next_url:
+            new_checkpoint.next_url = result_next_url
+        else:
+            # Stage complete — advance to next stage (or next course if last).
+            new_checkpoint.advance_stage()
+
+        new_checkpoint.has_more = new_checkpoint.current_course_index < len(
+            new_checkpoint.course_ids
+        )
+        return new_checkpoint
+
+    @override
+    def load_from_checkpoint(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: CanvasConnectorCheckpoint,
+    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
+        return self._load_from_checkpoint(
+            start, end, checkpoint, include_permissions=False
+        )
+
+    @override
+    def load_from_checkpoint_with_perm_sync(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: CanvasConnectorCheckpoint,
+    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
+        """Load documents from checkpoint with permission information included."""
+        return self._load_from_checkpoint(
+            start, end, checkpoint, include_permissions=True
+        )
+
+    @override
+    def build_dummy_checkpoint(self) -> CanvasConnectorCheckpoint:
+        return CanvasConnectorCheckpoint(has_more=True)
+
+    @override
+    def validate_checkpoint_json(
+        self, checkpoint_json: str
+    ) -> CanvasConnectorCheckpoint:
+        return CanvasConnectorCheckpoint.model_validate_json(checkpoint_json)
+
    @override
    def validate_connector_settings(self) -> None:
        """Validate Canvas connector settings by testing API access."""
@@ -415,38 +771,6 @@ class CanvasConnector(
                f"Unexpected error during Canvas settings validation: {exc}"
            )

-    @override
-    def load_from_checkpoint(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: CanvasConnectorCheckpoint,
-    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
-        # TODO(benwu408): implemented in PR3 (checkpoint)
-        raise NotImplementedError
-
-    @override
-    def load_from_checkpoint_with_perm_sync(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: CanvasConnectorCheckpoint,
-    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
-        # TODO(benwu408): implemented in PR3 (checkpoint)
-        raise NotImplementedError
-
-    @override
-    def build_dummy_checkpoint(self) -> CanvasConnectorCheckpoint:
-        # TODO(benwu408): implemented in PR3 (checkpoint)
-        raise NotImplementedError
-
-    @override
-    def validate_checkpoint_json(
-        self, checkpoint_json: str
-    ) -> CanvasConnectorCheckpoint:
-        # TODO(benwu408): implemented in PR3 (checkpoint)
-        raise NotImplementedError
-
    @override
    def retrieve_all_slim_docs_perm_sync(
        self,
--- a/backend/onyx/connectors/clickup/connector.py
+++ b/backend/onyx/connectors/clickup/connector.py
@@ -171,7 +171,10 @@ class ClickupConnector(LoadConnector, PollConnector):
                        document.metadata[extra_field] = task[extra_field]

                if self.retrieve_task_comments:
-                    document.sections.extend(self._get_task_comments(task["id"]))
+                    document.sections = [
+                        *document.sections,
+                        *self._get_task_comments(task["id"]),
+                    ]

                doc_batch.append(document)

--- a/backend/onyx/connectors/cross_connector_utils/tabular_section_utils.py
+++ b/backend/onyx/connectors/cross_connector_utils/tabular_section_utils.py
@@ -0,0 +1,65 @@
+import csv
+import io
+from typing import IO
+
+from onyx.connectors.models import TabularSection
+from onyx.file_processing.extract_file_text import file_io_to_text
+from onyx.file_processing.extract_file_text import xlsx_sheet_extraction
+from onyx.file_processing.file_types import OnyxFileExtensions
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def is_tabular_file(file_name: str) -> bool:
+    lowered = file_name.lower()
+    return any(lowered.endswith(ext) for ext in OnyxFileExtensions.TABULAR_EXTENSIONS)
+
+
+def _tsv_to_csv(tsv_text: str) -> str:
+    """Re-serialize tab-separated text as CSV so downstream parsers that
+    assume the default Excel dialect read the columns correctly."""
+    out = io.StringIO()
+    csv.writer(out, lineterminator="\n").writerows(
+        csv.reader(io.StringIO(tsv_text), dialect="excel-tab")
+    )
+    return out.getvalue().rstrip("\n")
+
+
+def tabular_file_to_sections(
+    file: IO[bytes],
+    file_name: str,
+    link: str = "",
+) -> list[TabularSection]:
+    """Convert a tabular file into one or more TabularSections.
+
+    - .xlsx → one TabularSection per non-empty sheet.
+    - .csv / .tsv → a single TabularSection containing the full decoded
+      file.
+
+    Returns an empty list when the file yields no extractable content.
+    """
+    lowered = file_name.lower()
+
+    if lowered.endswith(".xlsx"):
+        return [
+            TabularSection(link=f"{file_name} :: {sheet_title}", text=csv_text)
+            for csv_text, sheet_title in xlsx_sheet_extraction(
+                file, file_name=file_name
+            )
+        ]
+
+    if not lowered.endswith((".csv", ".tsv")):
+        raise ValueError(f"{file_name!r} is not a tabular file")
+
+    try:
+        text = file_io_to_text(file).strip()
+    except Exception:
+        logger.exception(f"Failure decoding {file_name}")
+        raise
+
+    if not text:
+        return []
+    if lowered.endswith(".tsv"):
+        text = _tsv_to_csv(text)
+    return [TabularSection(link=link or file_name, text=text)]
--- a/backend/onyx/connectors/google_drive/connector.py
+++ b/backend/onyx/connectors/google_drive/connector.py
@@ -75,6 +75,7 @@ from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import NormalizationResult
 from onyx.connectors.interfaces import Resolver
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.interfaces import SlimConnectorWithPermSync
 from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorMissingCredentialError
@@ -207,6 +208,7 @@ class DriveIdStatus(Enum):


 class GoogleDriveConnector(
+    SlimConnector,
    SlimConnectorWithPermSync,
    CheckpointedConnectorWithPermSync[GoogleDriveCheckpoint],
    Resolver,
@@ -1754,6 +1756,7 @@ class GoogleDriveConnector(
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
+        include_permissions: bool = True,
    ) -> GenerateSlimDocumentOutput:
        files_batch: list[RetrievedDriveFile] = []
        slim_batch: list[SlimDocument | HierarchyNode] = []
@@ -1763,9 +1766,13 @@ class GoogleDriveConnector(
            nonlocal files_batch, slim_batch

            # Get new ancestor hierarchy nodes first
-            permission_sync_context = PermissionSyncContext(
-                primary_admin_email=self.primary_admin_email,
-                google_domain=self.google_domain,
+            permission_sync_context = (
+                PermissionSyncContext(
+                    primary_admin_email=self.primary_admin_email,
+                    google_domain=self.google_domain,
+                )
+                if include_permissions
+                else None
            )
            new_ancestors = self._get_new_ancestors_for_files(
                files=files_batch,
@@ -1779,10 +1786,7 @@ class GoogleDriveConnector(
                if doc := build_slim_document(
                    self.creds,
                    file.drive_file,
-                    PermissionSyncContext(
-                        primary_admin_email=self.primary_admin_email,
-                        google_domain=self.google_domain,
-                    ),
+                    permission_sync_context,
                    retriever_email=file.user_email,
                ):
                    slim_batch.append(doc)
@@ -1822,11 +1826,12 @@ class GoogleDriveConnector(
        if files_batch:
            yield _yield_slim_batch()

-    def retrieve_all_slim_docs_perm_sync(
+    def _retrieve_all_slim_docs_impl(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
        callback: IndexingHeartbeatInterface | None = None,
+        include_permissions: bool = True,
    ) -> GenerateSlimDocumentOutput:
        try:
            checkpoint = self.build_dummy_checkpoint()
@@ -1836,13 +1841,34 @@ class GoogleDriveConnector(
                    start=start,
                    end=end,
                    callback=callback,
+                    include_permissions=include_permissions,
                )
-            logger.info("Drive perm sync: Slim doc retrieval complete")
-
+            logger.info("Drive slim doc retrieval complete")
        except Exception as e:
            if MISSING_SCOPES_ERROR_STR in str(e):
                raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e
-            raise e
+            raise
+
+    @override
+    def retrieve_all_slim_docs(
+        self,
+        start: SecondsSinceUnixEpoch | None = None,
+        end: SecondsSinceUnixEpoch | None = None,
+        callback: IndexingHeartbeatInterface | None = None,
+    ) -> GenerateSlimDocumentOutput:
+        return self._retrieve_all_slim_docs_impl(
+            start=start, end=end, callback=callback, include_permissions=False
+        )
+
+    def retrieve_all_slim_docs_perm_sync(
+        self,
+        start: SecondsSinceUnixEpoch | None = None,
+        end: SecondsSinceUnixEpoch | None = None,
+        callback: IndexingHeartbeatInterface | None = None,
+    ) -> GenerateSlimDocumentOutput:
+        return self._retrieve_all_slim_docs_impl(
+            start=start, end=end, callback=callback, include_permissions=True
+        )

    def validate_connector_settings(self) -> None:
        if self._creds is None:
--- a/backend/onyx/connectors/interfaces.py
+++ b/backend/onyx/connectors/interfaces.py
@@ -123,6 +123,9 @@ class SlimConnector(BaseConnector):
    @abc.abstractmethod
    def retrieve_all_slim_docs(
        self,
+        start: SecondsSinceUnixEpoch | None = None,
+        end: SecondsSinceUnixEpoch | None = None,
+        callback: IndexingHeartbeatInterface | None = None,
    ) -> GenerateSlimDocumentOutput:
        raise NotImplementedError

--- a/backend/onyx/connectors/models.py
+++ b/backend/onyx/connectors/models.py
@@ -1,4 +1,5 @@
 import sys
+from collections.abc import Sequence
 from datetime import datetime
 from enum import Enum
 from typing import Any
@@ -39,6 +40,7 @@ class SectionType(str, Enum):

    TEXT = "text"
    IMAGE = "image"
+    TABULAR = "tabular"


 class Section(BaseModel):
@@ -70,6 +72,18 @@ class ImageSection(Section):
        return sys.getsizeof(self.image_file_id) + sys.getsizeof(self.link)


+class TabularSection(Section):
+    """Section containing tabular data (csv/tsv content, or one sheet of
+    an xlsx workbook rendered as CSV)."""
+
+    type: Literal[SectionType.TABULAR] = SectionType.TABULAR
+    text: str  # CSV representation in a string
+    link: str
+
+    def __sizeof__(self) -> int:
+        return sys.getsizeof(self.text) + sys.getsizeof(self.link)
+
+
 class BasicExpertInfo(BaseModel):
    """Basic Information for the owner of a document, any of the fields can be left as None
    Display fallback goes as follows:
@@ -171,7 +185,7 @@ class DocumentBase(BaseModel):
    """Used for Onyx ingestion api, the ID is inferred before use if not provided"""

    id: str | None = None
-    sections: list[TextSection | ImageSection]
+    sections: Sequence[TextSection | ImageSection | TabularSection]
    source: DocumentSource | None = None
    semantic_identifier: str  # displayed in the UI as the main identifier for the doc
    # TODO(andrei): Ideally we could improve this to where each value is just a
@@ -381,12 +395,9 @@ class IndexingDocument(Document):
            )
        else:
            section_len = sum(
-                (
-                    len(section.text)
-                    if isinstance(section, TextSection) and section.text is not None
-                    else 0
-                )
+                len(section.text) if section.text is not None else 0
                for section in self.sections
+                if isinstance(section, (TextSection, TabularSection))
            )

        return title_len + section_len
--- a/backend/onyx/db/connector_credential_pair.py
+++ b/backend/onyx/db/connector_credential_pair.py
@@ -750,31 +750,3 @@ def resync_cc_pair(
    )

    db_session.commit()
-
-
-# ── Metrics query helpers ──────────────────────────────────────────────
-
-
-def get_connector_health_for_metrics(
-    db_session: Session,
-) -> list:  # Returns list of Row tuples
-    """Return connector health data for Prometheus metrics.
-
-    Each row is (cc_pair_id, status, in_repeated_error_state,
-    last_successful_index_time, name, source).
-    """
-    return (
-        db_session.query(
-            ConnectorCredentialPair.id,
-            ConnectorCredentialPair.status,
-            ConnectorCredentialPair.in_repeated_error_state,
-            ConnectorCredentialPair.last_successful_index_time,
-            ConnectorCredentialPair.name,
-            Connector.source,
-        )
-        .join(
-            Connector,
-            ConnectorCredentialPair.connector_id == Connector.id,
-        )
-        .all()
-    )
--- a/backend/onyx/db/index_attempt.py
+++ b/backend/onyx/db/index_attempt.py
@@ -2,8 +2,6 @@ from collections.abc import Sequence
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
-from typing import NamedTuple
-from typing import TYPE_CHECKING
 from typing import TypeVarTuple

 from sqlalchemy import and_
@@ -30,17 +28,6 @@ from onyx.utils.logger import setup_logger
 from onyx.utils.telemetry import optional_telemetry
 from onyx.utils.telemetry import RecordType

-if TYPE_CHECKING:
-    from onyx.configs.constants import DocumentSource
-
-# from sqlalchemy.sql.selectable import Select
-
-# Comment out unused imports that cause mypy errors
-# from onyx.auth.models import UserRole
-# from onyx.configs.constants import MAX_LAST_VALID_CHECKPOINT_AGE_SECONDS
-# from onyx.db.connector_credential_pair import ConnectorCredentialPairIdentifier
-# from onyx.db.engine import async_query_for_dms
-
 logger = setup_logger()


@@ -981,104 +968,48 @@ def get_index_attempt_errors_for_cc_pair(
    return list(db_session.scalars(stmt).all())


-# ── Metrics query helpers ──────────────────────────────────────────────
-
-
-class ActiveIndexAttemptMetric(NamedTuple):
-    """Row returned by get_active_index_attempts_for_metrics."""
-
-    status: IndexingStatus
-    source: "DocumentSource"
-    cc_pair_id: int
-    cc_pair_name: str | None
-    attempt_count: int
-
-
-def get_active_index_attempts_for_metrics(
+def get_index_attempt_errors_across_connectors(
    db_session: Session,
-) -> list[ActiveIndexAttemptMetric]:
-    """Return non-terminal index attempts grouped by status, source, and connector.
+    cc_pair_id: int | None = None,
+    error_type: str | None = None,
+    start_time: datetime | None = None,
+    end_time: datetime | None = None,
+    unresolved_only: bool = True,
+    page: int = 0,
+    page_size: int = 25,
+) -> tuple[list[IndexAttemptError], int]:
+    """Query index attempt errors across all connectors with optional filters.

-    Each row is (status, source, cc_pair_id, cc_pair_name, attempt_count).
+    Returns (errors, total_count) for pagination.
    """
-    from onyx.db.models import Connector
+    stmt = select(IndexAttemptError)
+    count_stmt = select(func.count()).select_from(IndexAttemptError)

-    terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
-    rows = (
-        db_session.query(
-            IndexAttempt.status,
-            Connector.source,
-            ConnectorCredentialPair.id,
-            ConnectorCredentialPair.name,
-            func.count(),
+    if cc_pair_id is not None:
+        stmt = stmt.where(IndexAttemptError.connector_credential_pair_id == cc_pair_id)
+        count_stmt = count_stmt.where(
+            IndexAttemptError.connector_credential_pair_id == cc_pair_id
        )
-        .join(
-            ConnectorCredentialPair,
-            IndexAttempt.connector_credential_pair_id == ConnectorCredentialPair.id,
-        )
-        .join(
-            Connector,
-            ConnectorCredentialPair.connector_id == Connector.id,
-        )
-        .filter(IndexAttempt.status.notin_(terminal_statuses))
-        .group_by(
-            IndexAttempt.status,
-            Connector.source,
-            ConnectorCredentialPair.id,
-            ConnectorCredentialPair.name,
-        )
-        .all()
-    )
-    return [ActiveIndexAttemptMetric(*row) for row in rows]

+    if error_type is not None:
+        stmt = stmt.where(IndexAttemptError.error_type == error_type)
+        count_stmt = count_stmt.where(IndexAttemptError.error_type == error_type)

-def get_failed_attempt_counts_by_cc_pair(
-    db_session: Session,
-    since: datetime | None = None,
-) -> dict[int, int]:
-    """Return {cc_pair_id: failed_attempt_count} for all connectors.
+    if unresolved_only:
+        stmt = stmt.where(IndexAttemptError.is_resolved.is_(False))
+        count_stmt = count_stmt.where(IndexAttemptError.is_resolved.is_(False))

-    When ``since`` is provided, only attempts created after that timestamp
-    are counted. Defaults to the last 90 days to avoid unbounded historical
-    aggregation.
-    """
-    if since is None:
-        since = datetime.now(timezone.utc) - timedelta(days=90)
+    if start_time is not None:
+        stmt = stmt.where(IndexAttemptError.time_created >= start_time)
+        count_stmt = count_stmt.where(IndexAttemptError.time_created >= start_time)

-    rows = (
-        db_session.query(
-            IndexAttempt.connector_credential_pair_id,
-            func.count(),
-        )
-        .filter(IndexAttempt.status == IndexingStatus.FAILED)
-        .filter(IndexAttempt.time_created >= since)
-        .group_by(IndexAttempt.connector_credential_pair_id)
-        .all()
-    )
-    return {cc_id: count for cc_id, count in rows}
+    if end_time is not None:
+        stmt = stmt.where(IndexAttemptError.time_created <= end_time)
+        count_stmt = count_stmt.where(IndexAttemptError.time_created <= end_time)

+    stmt = stmt.order_by(desc(IndexAttemptError.time_created))
+    stmt = stmt.offset(page * page_size).limit(page_size)

-def get_docs_indexed_by_cc_pair(
-    db_session: Session,
-    since: datetime | None = None,
-) -> dict[int, int]:
-    """Return {cc_pair_id: total_new_docs_indexed} across successful attempts.
-
-    Only counts attempts with status SUCCESS to avoid inflating counts with
-    partial results from failed attempts. When ``since`` is provided, only
-    attempts created after that timestamp are included.
-    """
-    if since is None:
-        since = datetime.now(timezone.utc) - timedelta(days=90)
-
-    query = (
-        db_session.query(
-            IndexAttempt.connector_credential_pair_id,
-            func.sum(func.coalesce(IndexAttempt.new_docs_indexed, 0)),
-        )
-        .filter(IndexAttempt.status == IndexingStatus.SUCCESS)
-        .filter(IndexAttempt.time_created >= since)
-        .group_by(IndexAttempt.connector_credential_pair_id)
-    )
-    rows = query.all()
-    return {cc_id: int(total or 0) for cc_id, total in rows}
+    total = db_session.scalar(count_stmt) or 0
+    errors = list(db_session.scalars(stmt).all())
+    return errors, total
--- a/backend/onyx/file_processing/extract_file_text.py
+++ b/backend/onyx/file_processing/extract_file_text.py
@@ -379,13 +379,25 @@ def _worksheet_to_matrix(
    worksheet: Worksheet,
 ) -> list[list[str]]:
    """
-    Converts a singular worksheet to a matrix of values
+    Converts a singular worksheet to a matrix of values.
+
+    Rows are padded to a uniform width. In openpyxl's read_only mode,
+    iter_rows can yield rows of differing lengths (trailing empty cells
+    are sometimes omitted), and downstream column cleanup assumes a
+    rectangular matrix.
    """
    rows: list[list[str]] = []
+    max_len = 0
    for worksheet_row in worksheet.iter_rows(min_row=1, values_only=True):
        row = ["" if cell is None else str(cell) for cell in worksheet_row]
+        if len(row) > max_len:
+            max_len = len(row)
        rows.append(row)

+    for row in rows:
+        if len(row) < max_len:
+            row.extend([""] * (max_len - len(row)))
+
    return rows


@@ -463,29 +475,13 @@ def _remove_empty_runs(
    return result


-def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
-    # TODO: switch back to this approach in a few months when markitdown
-    # fixes their handling of excel files
+def xlsx_sheet_extraction(file: IO[Any], file_name: str = "") -> list[tuple[str, str]]:
+    """
+    Converts each sheet in the excel file to a csv condensed string.
+    Returns a string and the worksheet title for each worksheet

-    # md = get_markitdown_converter()
-    # stream_info = StreamInfo(
-    #     mimetype=SPREADSHEET_MIME_TYPE, filename=file_name or None, extension=".xlsx"
-    # )
-    # try:
-    #     workbook = md.convert(to_bytesio(file), stream_info=stream_info)
-    # except (
-    #     BadZipFile,
-    #     ValueError,
-    #     FileConversionException,
-    #     UnsupportedFormatException,
-    # ) as e:
-    #     error_str = f"Failed to extract text from {file_name or 'xlsx file'}: {e}"
-    #     if file_name.startswith("~"):
-    #         logger.debug(error_str + " (this is expected for files with ~)")
-    #     else:
-    #         logger.warning(error_str)
-    #     return ""
-    # return workbook.markdown
+    Returns a list of (csv_text, sheet)
+    """
    try:
        workbook = openpyxl.load_workbook(file, read_only=True)
    except BadZipFile as e:
@@ -494,23 +490,30 @@ def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
            logger.debug(error_str + " (this is expected for files with ~)")
        else:
            logger.warning(error_str)
-        return ""
+        return []
    except Exception as e:
        if any(s in str(e) for s in KNOWN_OPENPYXL_BUGS):
            logger.error(
                f"Failed to extract text from {file_name or 'xlsx file'}. This happens due to a bug in openpyxl. {e}"
            )
-            return ""
+            return []
        raise

-    text_content = []
+    sheets: list[tuple[str, str]] = []
    for sheet in workbook.worksheets:
        sheet_matrix = _clean_worksheet_matrix(_worksheet_to_matrix(sheet))
        buf = io.StringIO()
        writer = csv.writer(buf, lineterminator="\n")
        writer.writerows(sheet_matrix)
-        text_content.append(buf.getvalue().rstrip("\n"))
-    return TEXT_SECTION_SEPARATOR.join(text_content)
+        csv_text = buf.getvalue().rstrip("\n")
+        if csv_text.strip():
+            sheets.append((csv_text, sheet.title))
+    return sheets
+
+
+def xlsx_to_text(file: IO[Any], file_name: str = "") -> str:
+    sheets = xlsx_sheet_extraction(file, file_name)
+    return TEXT_SECTION_SEPARATOR.join(csv_text for csv_text, _title in sheets)


 def eml_to_text(file: IO[Any]) -> str:
--- a/backend/onyx/indexing/chunking/document_chunker.py
+++ b/backend/onyx/indexing/chunking/document_chunker.py
@@ -7,6 +7,7 @@ from onyx.indexing.chunking.image_section_chunker import ImageChunker
 from onyx.indexing.chunking.section_chunker import AccumulatorState
 from onyx.indexing.chunking.section_chunker import ChunkPayload
 from onyx.indexing.chunking.section_chunker import SectionChunker
+from onyx.indexing.chunking.tabular_section_chunker import TabularChunker
 from onyx.indexing.chunking.text_section_chunker import TextChunker
 from onyx.indexing.models import DocAwareChunk
 from onyx.natural_language_processing.utils import BaseTokenizer
@@ -38,6 +39,7 @@ class DocumentChunker:
                chunk_splitter=chunk_splitter,
            ),
            SectionType.IMAGE: ImageChunker(),
+            SectionType.TABULAR: TabularChunker(tokenizer=tokenizer),
        }

    def chunk(
@@ -99,7 +101,9 @@ class DocumentChunker:
            payloads.extend(result.payloads)
            accumulator = result.accumulator

+        # Final flush — any leftover buffered text becomes one last payload.
        payloads.extend(accumulator.flush_to_list())
+
        return payloads

    def _select_chunker(self, section: Section) -> SectionChunker:
--- a/backend/onyx/indexing/chunking/tabular_section_chunker.py
+++ b/backend/onyx/indexing/chunking/tabular_section_chunker.py
@@ -0,0 +1,272 @@
+import csv
+import io
+from collections.abc import Iterable
+
+from pydantic import BaseModel
+
+from onyx.connectors.models import Section
+from onyx.indexing.chunking.section_chunker import AccumulatorState
+from onyx.indexing.chunking.section_chunker import ChunkPayload
+from onyx.indexing.chunking.section_chunker import SectionChunker
+from onyx.indexing.chunking.section_chunker import SectionChunkerOutput
+from onyx.natural_language_processing.utils import BaseTokenizer
+from onyx.natural_language_processing.utils import count_tokens
+from onyx.natural_language_processing.utils import split_text_by_tokens
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+COLUMNS_MARKER = "Columns:"
+FIELD_VALUE_SEPARATOR = ", "
+ROW_JOIN = "\n"
+NEWLINE_TOKENS = 1
+
+
+class _ParsedRow(BaseModel):
+    header: list[str]
+    row: list[str]
+
+
+class _TokenizedText(BaseModel):
+    text: str
+    token_count: int
+
+
+def format_row(header: list[str], row: list[str]) -> str:
+    """
+    A header-row combination is formatted like this:
+    field1=value1, field2=value2, field3=value3
+    """
+    pairs = _row_to_pairs(header, row)
+    formatted = FIELD_VALUE_SEPARATOR.join(f"{h}={v}" for h, v in pairs)
+    return formatted
+
+
+def format_columns_header(headers: list[str]) -> str:
+    """
+    Format the column header line. Underscored headers get a
+    space-substituted friendly alias in parens.
+    Example:
+        headers = ["id", "MTTR_hours"]
+        => "Columns: id, MTTR_hours (MTTR hours)"
+    """
+    parts: list[str] = []
+    for header in headers:
+        friendly = header
+        if "_" in header:
+            friendly = f'{header} ({header.replace("_", " ")})'
+        parts.append(friendly)
+    return f"{COLUMNS_MARKER} " + FIELD_VALUE_SEPARATOR.join(parts)
+
+
+def parse_section(section: Section) -> list[_ParsedRow]:
+    """Parse CSV into headers + rows. First non-empty row is the header;
+    blank rows are skipped."""
+    section_text = section.text or ""
+    if not section_text.strip():
+        return []
+
+    reader = csv.reader(io.StringIO(section_text))
+    non_empty_rows = [row for row in reader if any(cell.strip() for cell in row)]
+
+    if not non_empty_rows:
+        return []
+
+    header, *data_rows = non_empty_rows
+    return [_ParsedRow(header=header, row=row) for row in data_rows]
+
+
+def _row_to_pairs(headers: list[str], row: list[str]) -> list[tuple[str, str]]:
+    return [(h, v) for h, v in zip(headers, row) if v.strip()]
+
+
+def pack_chunk(chunk: str, new_row: str) -> str:
+    return chunk + "\n" + new_row
+
+
+def _split_row_by_pairs(
+    pairs: list[tuple[str, str]],
+    tokenizer: BaseTokenizer,
+    max_tokens: int,
+) -> list[_TokenizedText]:
+    """Greedily pack pairs into max-sized pieces. Any single pair that
+    itself exceeds ``max_tokens`` is token-split at id boundaries.
+    No headers."""
+    separator_tokens = count_tokens(FIELD_VALUE_SEPARATOR, tokenizer)
+    pieces: list[_TokenizedText] = []
+    current_parts: list[str] = []
+    current_tokens = 0
+
+    for pair in pairs:
+        pair_str = f"{pair[0]}={pair[1]}"
+        pair_tokens = count_tokens(pair_str, tokenizer)
+        increment = pair_tokens if not current_parts else separator_tokens + pair_tokens
+
+        if current_tokens + increment <= max_tokens:
+            current_parts.append(pair_str)
+            current_tokens += increment
+            continue
+
+        if current_parts:
+            pieces.append(
+                _TokenizedText(
+                    text=FIELD_VALUE_SEPARATOR.join(current_parts),
+                    token_count=current_tokens,
+                )
+            )
+            current_parts = []
+            current_tokens = 0
+
+        if pair_tokens > max_tokens:
+            for split_text in split_text_by_tokens(pair_str, tokenizer, max_tokens):
+                pieces.append(
+                    _TokenizedText(
+                        text=split_text,
+                        token_count=count_tokens(split_text, tokenizer),
+                    )
+                )
+        else:
+            current_parts = [pair_str]
+            current_tokens = pair_tokens
+
+    if current_parts:
+        pieces.append(
+            _TokenizedText(
+                text=FIELD_VALUE_SEPARATOR.join(current_parts),
+                token_count=current_tokens,
+            )
+        )
+    return pieces
+
+
+def _build_chunk_from_scratch(
+    pairs: list[tuple[str, str]],
+    formatted_row: str,
+    row_tokens: int,
+    column_header: str,
+    column_header_tokens: int,
+    sheet_header: str,
+    sheet_header_tokens: int,
+    tokenizer: BaseTokenizer,
+    max_tokens: int,
+) -> list[_TokenizedText]:
+    # 1. Row alone is too large — split by pairs, no headers.
+    if row_tokens > max_tokens:
+        return _split_row_by_pairs(pairs, tokenizer, max_tokens)
+
+    chunk = formatted_row
+    chunk_tokens = row_tokens
+
+    # 2. Attempt to add column header
+    candidate_tokens = column_header_tokens + NEWLINE_TOKENS + chunk_tokens
+    if candidate_tokens <= max_tokens:
+        chunk = column_header + ROW_JOIN + chunk
+        chunk_tokens = candidate_tokens
+
+    # 3. Attempt to add sheet header
+    if sheet_header:
+        candidate_tokens = sheet_header_tokens + NEWLINE_TOKENS + chunk_tokens
+        if candidate_tokens <= max_tokens:
+            chunk = sheet_header + ROW_JOIN + chunk
+            chunk_tokens = candidate_tokens
+
+    return [_TokenizedText(text=chunk, token_count=chunk_tokens)]
+
+
+def parse_to_chunks(
+    rows: Iterable[_ParsedRow],
+    sheet_header: str,
+    tokenizer: BaseTokenizer,
+    max_tokens: int,
+) -> list[str]:
+    rows_list = list(rows)
+    if not rows_list:
+        return []
+
+    column_header = format_columns_header(rows_list[0].header)
+    column_header_tokens = count_tokens(column_header, tokenizer)
+    sheet_header_tokens = count_tokens(sheet_header, tokenizer) if sheet_header else 0
+
+    chunks: list[str] = []
+    current_chunk = ""
+    current_chunk_tokens = 0
+
+    for row in rows_list:
+        pairs: list[tuple[str, str]] = _row_to_pairs(row.header, row.row)
+        formatted = format_row(row.header, row.row)
+        row_tokens = count_tokens(formatted, tokenizer)
+
+        if current_chunk:
+            # Attempt to pack it in (additive approximation)
+            if current_chunk_tokens + NEWLINE_TOKENS + row_tokens <= max_tokens:
+                current_chunk = pack_chunk(current_chunk, formatted)
+                current_chunk_tokens += NEWLINE_TOKENS + row_tokens
+                continue
+            # Doesn't fit — flush and start new
+            chunks.append(current_chunk)
+            current_chunk = ""
+            current_chunk_tokens = 0
+
+        # Build chunk from scratch
+        for piece in _build_chunk_from_scratch(
+            pairs=pairs,
+            formatted_row=formatted,
+            row_tokens=row_tokens,
+            column_header=column_header,
+            column_header_tokens=column_header_tokens,
+            sheet_header=sheet_header,
+            sheet_header_tokens=sheet_header_tokens,
+            tokenizer=tokenizer,
+            max_tokens=max_tokens,
+        ):
+            if current_chunk:
+                chunks.append(current_chunk)
+            current_chunk = piece.text
+            current_chunk_tokens = piece.token_count
+
+    # Flush remaining
+    if current_chunk:
+        chunks.append(current_chunk)
+
+    return chunks
+
+
+class TabularChunker(SectionChunker):
+    def __init__(self, tokenizer: BaseTokenizer) -> None:
+        self.tokenizer = tokenizer
+
+    def chunk_section(
+        self,
+        section: Section,
+        accumulator: AccumulatorState,
+        content_token_limit: int,
+    ) -> SectionChunkerOutput:
+        payloads = accumulator.flush_to_list()
+
+        parsed_rows = parse_section(section)
+        if not parsed_rows:
+            logger.warning(
+                f"TabularChunker: skipping unparseable section (link={section.link})"
+            )
+            return SectionChunkerOutput(
+                payloads=payloads, accumulator=AccumulatorState()
+            )
+
+        sheet_header = section.link or ""
+        chunk_texts = parse_to_chunks(
+            rows=parsed_rows,
+            sheet_header=sheet_header,
+            tokenizer=self.tokenizer,
+            max_tokens=content_token_limit,
+        )
+
+        for i, text in enumerate(chunk_texts):
+            payloads.append(
+                ChunkPayload(
+                    text=text,
+                    links={0: section.link or ""},
+                    is_continuation=(i > 0),
+                )
+            )
+        return SectionChunkerOutput(payloads=payloads, accumulator=AccumulatorState())
--- a/backend/onyx/indexing/chunking/text_section_chunker.py
+++ b/backend/onyx/indexing/chunking/text_section_chunker.py
@@ -10,6 +10,7 @@ from onyx.indexing.chunking.section_chunker import SectionChunker
 from onyx.indexing.chunking.section_chunker import SectionChunkerOutput
 from onyx.natural_language_processing.utils import BaseTokenizer
 from onyx.natural_language_processing.utils import count_tokens
+from onyx.natural_language_processing.utils import split_text_by_tokens
 from onyx.utils.text_processing import clean_text
 from onyx.utils.text_processing import shared_precompare_cleanup
 from shared_configs.configs import STRICT_CHUNK_TOKEN_LIMIT
@@ -90,8 +91,8 @@ class TextChunker(SectionChunker):
                STRICT_CHUNK_TOKEN_LIMIT
                and count_tokens(split_text, self.tokenizer) > content_token_limit
            ):
-                smaller_chunks = self._split_oversized_chunk(
-                    split_text, content_token_limit
+                smaller_chunks = split_text_by_tokens(
+                    split_text, self.tokenizer, content_token_limit
                )
                for j, small_chunk in enumerate(smaller_chunks):
                    payloads.append(
@@ -114,16 +115,3 @@ class TextChunker(SectionChunker):
            payloads=payloads,
            accumulator=AccumulatorState(),
        )
-
-    def _split_oversized_chunk(self, text: str, content_token_limit: int) -> list[str]:
-        tokens = self.tokenizer.tokenize(text)
-        chunks: list[str] = []
-        start = 0
-        total_tokens = len(tokens)
-        while start < total_tokens:
-            end = min(start + content_token_limit, total_tokens)
-            token_chunk = tokens[start:end]
-            chunk_text = " ".join(token_chunk)
-            chunks.append(chunk_text)
-            start = end
-        return chunks
--- a/backend/onyx/indexing/embedder.py
+++ b/backend/onyx/indexing/embedder.py
@@ -3,6 +3,8 @@ from abc import ABC
 from abc import abstractmethod
 from collections import defaultdict

+import sentry_sdk
+
 from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorStopSignal
 from onyx.connectors.models import DocumentFailure
@@ -291,6 +293,13 @@ def embed_chunks_with_failure_handling(
            )
            embedded_chunks.extend(doc_embedded_chunks)
        except Exception as e:
+            with sentry_sdk.new_scope() as scope:
+                scope.set_tag("stage", "embedding")
+                scope.set_tag("doc_id", doc_id)
+                if tenant_id:
+                    scope.set_tag("tenant_id", tenant_id)
+                scope.fingerprint = ["embedding-failure", type(e).__name__]
+                sentry_sdk.capture_exception(e)
            logger.exception(f"Failed to embed chunks for document '{doc_id}'")
            failures.append(
                ConnectorFailure(
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -5,6 +5,7 @@ from collections.abc import Iterator
 from contextlib import contextmanager
 from typing import Protocol

+import sentry_sdk
 from pydantic import BaseModel
 from pydantic import ConfigDict
 from sqlalchemy.orm import Session
@@ -332,6 +333,13 @@ def index_doc_batch_with_handler(
    except Exception as e:
        # don't log the batch directly, it's too much text
        document_ids = [doc.id for doc in document_batch]
+        with sentry_sdk.new_scope() as scope:
+            scope.set_tag("stage", "indexing_pipeline")
+            scope.set_tag("tenant_id", tenant_id)
+            scope.set_tag("batch_size", str(len(document_batch)))
+            scope.set_extra("document_ids", document_ids)
+            scope.fingerprint = ["indexing-pipeline-failure", type(e).__name__]
+            sentry_sdk.capture_exception(e)
        logger.exception(f"Failed to index document batch: {document_ids}")

        index_pipeline_result = IndexingPipelineResult(
--- a/backend/onyx/indexing/vector_db_insertion.py
+++ b/backend/onyx/indexing/vector_db_insertion.py
@@ -6,6 +6,7 @@ from itertools import chain
 from itertools import groupby

 import httpx
+import sentry_sdk

 from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import DocumentFailure
@@ -88,6 +89,12 @@ def write_chunks_to_vector_db_with_backoff(
                )
            )
        except Exception as e:
+            with sentry_sdk.new_scope() as scope:
+                scope.set_tag("stage", "vector_db_write")
+                scope.set_tag("doc_id", doc_id)
+                scope.set_tag("tenant_id", index_batch_params.tenant_id)
+                scope.fingerprint = ["vector-db-write-failure", type(e).__name__]
+                sentry_sdk.capture_exception(e)
            logger.exception(
                f"Failed to write document chunks for '{doc_id}' to vector db"
            )
--- a/backend/onyx/main.py
+++ b/backend/onyx/main.py
@@ -434,11 +434,14 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
        lifespan=lifespan_override or lifespan,
    )
    if SENTRY_DSN:
+        from onyx.configs.sentry import _add_instance_tags
+
        sentry_sdk.init(
            dsn=SENTRY_DSN,
            integrations=[StarletteIntegration(), FastApiIntegration()],
            traces_sample_rate=0.1,
            release=__version__,
+            before_send=_add_instance_tags,
        )
        logger.info("Sentry initialized")
    else:
--- a/backend/onyx/natural_language_processing/utils.py
+++ b/backend/onyx/natural_language_processing/utils.py
@@ -201,6 +201,33 @@ def count_tokens(
    return total


+def split_text_by_tokens(
+    text: str,
+    tokenizer: BaseTokenizer,
+    max_tokens: int,
+) -> list[str]:
+    """Split ``text`` into pieces of ≤ ``max_tokens`` tokens each, via
+    encode/decode at token-id boundaries.
+
+    Note: the returned pieces are not strictly guaranteed to re-tokenize to
+    ≤ max_tokens. BPE merges at window boundaries may drift by a few tokens,
+    and cuts landing mid-multi-byte-UTF-8-character produce replacement
+    characters on decode. Good enough for "best-effort" splitting of
+    oversized content, not for hard limit enforcement.
+    """
+    if not text:
+        return []
+
+    token_ids: list[int] = []
+    for start in range(0, len(text), _ENCODE_CHUNK_SIZE):
+        token_ids.extend(tokenizer.encode(text[start : start + _ENCODE_CHUNK_SIZE]))
+
+    return [
+        tokenizer.decode(token_ids[start : start + max_tokens])
+        for start in range(0, len(token_ids), max_tokens)
+    ]
+
+
 def tokenizer_trim_content(
    content: str, desired_length: int, tokenizer: BaseTokenizer
 ) -> str:
--- a/backend/onyx/server/features/persona/models.py
+++ b/backend/onyx/server/features/persona/models.py
@@ -185,6 +185,10 @@ class MinimalPersonaSnapshot(BaseModel):
        for doc_set in persona.document_sets:
            for cc_pair in doc_set.connector_credential_pairs:
                sources.add(cc_pair.connector.source)
+            for fed_ds in doc_set.federated_connectors:
+                non_fed = fed_ds.federated_connector.source.to_non_federated_source()
+                if non_fed is not None:
+                    sources.add(non_fed)

        # Sources from hierarchy nodes
        for node in persona.hierarchy_nodes:
@@ -195,6 +199,9 @@ class MinimalPersonaSnapshot(BaseModel):
            if doc.parent_hierarchy_node:
                sources.add(doc.parent_hierarchy_node.source)

+        if persona.user_files:
+            sources.add(DocumentSource.USER_FILE)
+
        return MinimalPersonaSnapshot(
            # Core fields actually used by ChatPage
            id=persona.id,
--- a/backend/onyx/server/manage/administrative.py
+++ b/backend/onyx/server/manage/administrative.py
@@ -11,6 +11,7 @@ from sqlalchemy.orm import Session
 from onyx.auth.permissions import require_permission
 from onyx.auth.users import current_curator_or_admin_user
 from onyx.background.celery.versioned_apps.client import app as client_app
+from onyx.background.indexing.models import IndexAttemptErrorPydantic
 from onyx.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ
 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import KV_GEN_AI_KEY_CHECK_TIME
@@ -28,6 +29,7 @@ from onyx.db.feedback import fetch_docs_ranked_by_boost_for_user
 from onyx.db.feedback import update_document_boost_for_user
 from onyx.db.feedback import update_document_hidden_for_user
 from onyx.db.index_attempt import cancel_indexing_attempts_for_ccpair
+from onyx.db.index_attempt import get_index_attempt_errors_across_connectors
 from onyx.db.models import User
 from onyx.file_store.file_store import get_default_file_store
 from onyx.key_value_store.factory import get_kv_store
@@ -35,6 +37,7 @@ from onyx.key_value_store.interface import KvKeyNotFoundError
 from onyx.llm.factory import get_default_llm
 from onyx.llm.utils import test_llm
 from onyx.server.documents.models import ConnectorCredentialPairIdentifier
+from onyx.server.documents.models import PaginatedReturn
 from onyx.server.manage.models import BoostDoc
 from onyx.server.manage.models import BoostUpdateRequest
 from onyx.server.manage.models import HiddenUpdateRequest
@@ -206,3 +209,40 @@ def create_deletion_attempt_for_connector_id(
        file_store = get_default_file_store()
        for file_id in connector.connector_specific_config.get("file_locations", []):
            file_store.delete_file(file_id)
+
+
+@router.get("/admin/indexing/failed-documents")
+def get_failed_documents(
+    cc_pair_id: int | None = None,
+    error_type: str | None = None,
+    start_time: datetime | None = None,
+    end_time: datetime | None = None,
+    include_resolved: bool = False,
+    page_num: int = 0,
+    page_size: int = 25,
+    _: User = Depends(require_permission(Permission.FULL_ADMIN_PANEL_ACCESS)),
+    db_session: Session = Depends(get_session),
+) -> PaginatedReturn[IndexAttemptErrorPydantic]:
+    """Get indexing errors across all connectors with optional filters.
+
+    Provides a cross-connector view of document indexing failures.
+    Defaults to last 30 days if no start_time is provided to avoid
+    unbounded count queries.
+    """
+    if start_time is None:
+        start_time = datetime.now(tz=timezone.utc) - timedelta(days=30)
+
+    errors, total = get_index_attempt_errors_across_connectors(
+        db_session=db_session,
+        cc_pair_id=cc_pair_id,
+        error_type=error_type,
+        start_time=start_time,
+        end_time=end_time,
+        unresolved_only=not include_resolved,
+        page=page_num,
+        page_size=page_size,
+    )
+    return PaginatedReturn(
+        items=[IndexAttemptErrorPydantic.from_model(e) for e in errors],
+        total_items=total,
+    )
--- a/backend/onyx/server/manage/llm/utils.py
+++ b/backend/onyx/server/manage/llm/utils.py
@@ -183,6 +183,9 @@ def generate_ollama_display_name(model_name: str) -> str:
        "qwen2.5:7b" → "Qwen 2.5 7B"
        "mistral:latest" → "Mistral"
        "deepseek-r1:14b" → "DeepSeek R1 14B"
+        "gemma4:e4b" → "Gemma 4 E4B"
+        "deepseek-v3.1:671b-cloud" → "DeepSeek V3.1 671B Cloud"
+        "qwen3-vl:235b-instruct-cloud" → "Qwen 3-vl 235B Instruct Cloud"
    """
    # Split into base name and tag
    if ":" in model_name:
@@ -209,13 +212,24 @@ def generate_ollama_display_name(model_name: str) -> str:
        # Default: Title case with dashes converted to spaces
        display_name = base.replace("-", " ").title()

-    # Process tag to extract size info (skip "latest")
+    # Process tag (skip "latest")
    if tag and tag.lower() != "latest":
-        # Extract size like "7b", "70b", "14b"
-        size_match = re.match(r"^(\d+(?:\.\d+)?[bBmM])", tag)
+        # Check for size prefix like "7b", "70b", optionally followed by modifiers
+        size_match = re.match(r"^(\d+(?:\.\d+)?[bBmM])(-.+)?$", tag)
        if size_match:
            size = size_match.group(1).upper()
-            display_name = f"{display_name} {size}"
+            remainder = size_match.group(2)
+            if remainder:
+                # Format modifiers like "-cloud", "-instruct-cloud"
+                modifiers = " ".join(
+                    p.title() for p in remainder.strip("-").split("-") if p
+                )
+                display_name = f"{display_name} {size} {modifiers}"
+            else:
+                display_name = f"{display_name} {size}"
+        else:
+            # Non-size tags like "e4b", "q4_0", "fp16", "cloud"
+            display_name = f"{display_name} {tag.upper()}"

    return display_name

--- a/backend/onyx/server/manage/voice/user_api.py
+++ b/backend/onyx/server/manage/voice/user_api.py
@@ -1,13 +1,14 @@
+import json
 import secrets
 from collections.abc import AsyncIterator

 from fastapi import APIRouter
 from fastapi import Depends
 from fastapi import File
-from fastapi import Query
 from fastapi import UploadFile
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
+from pydantic import Field
 from sqlalchemy.orm import Session

 from onyx.auth.permissions import require_permission
@@ -113,28 +114,47 @@ async def transcribe_audio(
        ) from exc


+def _extract_provider_error(exc: Exception) -> str:
+    """Extract a human-readable message from a provider exception.
+
+    Provider errors often embed JSON from upstream APIs (e.g. ElevenLabs).
+    This tries to parse a readable ``message`` field out of common JSON
+    error shapes; falls back to ``str(exc)`` if nothing better is found.
+    """
+    raw = str(exc)
+    try:
+        # Many providers embed JSON after a prefix like "ElevenLabs TTS failed: {...}"
+        json_start = raw.find("{")
+        if json_start == -1:
+            return raw
+        parsed = json.loads(raw[json_start:])
+        # Shape: {"detail": {"message": "..."}} (ElevenLabs)
+        detail = parsed.get("detail", parsed)
+        if isinstance(detail, dict):
+            return detail.get("message") or detail.get("error") or raw
+        if isinstance(detail, str):
+            return detail
+    except (json.JSONDecodeError, AttributeError, TypeError):
+        pass
+    return raw
+
+
+class SynthesizeRequest(BaseModel):
+    text: str = Field(..., min_length=1)
+    voice: str | None = None
+    speed: float | None = Field(default=None, ge=0.5, le=2.0)
+
+
@router.post("/synthesize")
 async def synthesize_speech(
-    text: str | None = Query(
-        default=None, description="Text to synthesize", max_length=4096
-    ),
-    voice: str | None = Query(default=None, description="Voice ID to use"),
-    speed: float | None = Query(
-        default=None, description="Playback speed (0.5-2.0)", ge=0.5, le=2.0
-    ),
+    body: SynthesizeRequest,
    user: User = Depends(require_permission(Permission.BASIC_ACCESS)),
 ) -> StreamingResponse:
-    """
-    Synthesize text to speech using the default TTS provider.
-
-    Accepts parameters via query string for streaming compatibility.
-    """
-    logger.info(
-        f"TTS request: text length={len(text) if text else 0}, voice={voice}, speed={speed}"
-    )
-
-    if not text:
-        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Text is required")
+    """Synthesize text to speech using the default TTS provider."""
+    text = body.text
+    voice = body.voice
+    speed = body.speed
+    logger.info(f"TTS request: text length={len(text)}, voice={voice}, speed={speed}")

    # Use short-lived session to fetch provider config, then release connection
    # before starting the long-running streaming response
@@ -177,31 +197,36 @@ async def synthesize_speech(
            logger.error(f"Failed to get voice provider: {exc}")
            raise OnyxError(OnyxErrorCode.INTERNAL_ERROR, str(exc)) from exc

-    # Session is now closed - streaming response won't hold DB connection
+    # Pull the first chunk before returning the StreamingResponse. If the
+    # provider rejects the request (e.g. text too long), the error surfaces
+    # as a proper HTTP error instead of a broken audio stream.
+    stream_iter = provider.synthesize_stream(
+        text=text, voice=final_voice, speed=final_speed
+    )
+    try:
+        first_chunk = await stream_iter.__anext__()
+    except StopAsyncIteration:
+        raise OnyxError(OnyxErrorCode.INTERNAL_ERROR, "TTS provider returned no audio")
+    except Exception as exc:
+        raise OnyxError(
+            OnyxErrorCode.BAD_GATEWAY, _extract_provider_error(exc)
+        ) from exc
+
    async def audio_stream() -> AsyncIterator[bytes]:
-        try:
-            chunk_count = 0
-            async for chunk in provider.synthesize_stream(
-                text=text, voice=final_voice, speed=final_speed
-            ):
-                chunk_count += 1
-                yield chunk
-            logger.info(f"TTS streaming complete: {chunk_count} chunks sent")
-        except NotImplementedError as exc:
-            logger.error(f"TTS not implemented: {exc}")
-            raise
-        except Exception as exc:
-            logger.error(f"Synthesis failed: {exc}")
-            raise
+        yield first_chunk
+        chunk_count = 1
+        async for chunk in stream_iter:
+            chunk_count += 1
+            yield chunk
+        logger.info(f"TTS streaming complete: {chunk_count} chunks sent")

    return StreamingResponse(
        audio_stream(),
        media_type="audio/mpeg",
        headers={
            "Content-Disposition": "inline; filename=speech.mp3",
-            # Allow streaming by not setting content-length
            "Cache-Control": "no-cache",
-            "X-Accel-Buffering": "no",  # Disable nginx buffering
+            "X-Accel-Buffering": "no",
        },
    )

--- a/backend/onyx/server/metrics/connector_health_metrics.py
+++ b/backend/onyx/server/metrics/connector_health_metrics.py
@@ -0,0 +1,110 @@
+"""Prometheus metrics for connector health and index attempts.
+
+Emitted by docfetching and docprocessing workers when connector or
+index attempt state changes. All functions silently catch exceptions
+to avoid disrupting the caller's business logic.
+
+Gauge metrics (error state, last success timestamp) are per-process.
+With multiple worker pods, use max() aggregation in PromQL to get the
+correct value across instances, e.g.:
+    max by (cc_pair_id) (onyx_connector_in_error_state)
+"""
+
+from prometheus_client import Counter
+from prometheus_client import Gauge
+
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+# --- Index attempt lifecycle ---
+
+INDEX_ATTEMPT_STATUS = Counter(
+    "onyx_index_attempt_transitions_total",
+    "Index attempt status transitions",
+    ["tenant_id", "source", "cc_pair_id", "status"],
+)
+
+# --- Connector health ---
+
+CONNECTOR_IN_ERROR_STATE = Gauge(
+    "onyx_connector_in_error_state",
+    "Whether the connector is in a repeated error state (1=yes, 0=no)",
+    ["tenant_id", "source", "cc_pair_id"],
+)
+
+CONNECTOR_LAST_SUCCESS_TIMESTAMP = Gauge(
+    "onyx_connector_last_success_timestamp_seconds",
+    "Unix timestamp of last successful indexing for this connector",
+    ["tenant_id", "source", "cc_pair_id"],
+)
+
+CONNECTOR_DOCS_INDEXED = Counter(
+    "onyx_connector_docs_indexed_total",
+    "Total documents indexed per connector (monotonic)",
+    ["tenant_id", "source", "cc_pair_id"],
+)
+
+CONNECTOR_INDEXING_ERRORS = Counter(
+    "onyx_connector_indexing_errors_total",
+    "Total failed index attempts per connector (monotonic)",
+    ["tenant_id", "source", "cc_pair_id"],
+)
+
+
+def on_index_attempt_status_change(
+    tenant_id: str,
+    source: str,
+    cc_pair_id: int,
+    status: str,
+) -> None:
+    """Called on any index attempt status transition."""
+    try:
+        labels = {
+            "tenant_id": tenant_id,
+            "source": source,
+            "cc_pair_id": str(cc_pair_id),
+        }
+        INDEX_ATTEMPT_STATUS.labels(**labels, status=status).inc()
+        if status == "failed":
+            CONNECTOR_INDEXING_ERRORS.labels(**labels).inc()
+    except Exception:
+        logger.debug("Failed to record index attempt status metric", exc_info=True)
+
+
+def on_connector_error_state_change(
+    tenant_id: str,
+    source: str,
+    cc_pair_id: int,
+    in_error: bool,
+) -> None:
+    """Called when a connector's in_repeated_error_state changes."""
+    try:
+        CONNECTOR_IN_ERROR_STATE.labels(
+            tenant_id=tenant_id,
+            source=source,
+            cc_pair_id=str(cc_pair_id),
+        ).set(1.0 if in_error else 0.0)
+    except Exception:
+        logger.debug("Failed to record connector error state metric", exc_info=True)
+
+
+def on_connector_indexing_success(
+    tenant_id: str,
+    source: str,
+    cc_pair_id: int,
+    docs_indexed: int,
+    success_timestamp: float,
+) -> None:
+    """Called when an indexing run completes successfully."""
+    try:
+        labels = {
+            "tenant_id": tenant_id,
+            "source": source,
+            "cc_pair_id": str(cc_pair_id),
+        }
+        CONNECTOR_LAST_SUCCESS_TIMESTAMP.labels(**labels).set(success_timestamp)
+        if docs_indexed > 0:
+            CONNECTOR_DOCS_INDEXED.labels(**labels).inc(docs_indexed)
+    except Exception:
+        logger.debug("Failed to record connector success metric", exc_info=True)
--- a/backend/onyx/server/metrics/indexing_pipeline.py
+++ b/backend/onyx/server/metrics/indexing_pipeline.py
@@ -1,25 +1,30 @@
-"""Prometheus collectors for Celery queue depths and indexing pipeline state.
+"""Prometheus collectors for Celery queue depths and infrastructure health.

-These collectors query Redis and Postgres at scrape time (the Collector pattern),
+These collectors query Redis at scrape time (the Collector pattern),
 so metrics are always fresh when Prometheus scrapes /metrics. They run inside the
-monitoring celery worker which already has Redis and DB access.
+monitoring celery worker which already has Redis access.

-To avoid hammering Redis/Postgres on every 15s scrape, results are cached with
+To avoid hammering Redis on every 15s scrape, results are cached with
 a configurable TTL (default 30s). This means metrics may be up to TTL seconds
 stale, which is fine for monitoring dashboards.
+
+Note: connector health and index attempt metrics are push-based (emitted by
+workers at state-change time) and live in connector_health_metrics.py.
 """

+from __future__ import annotations
+
+import concurrent.futures
 import json
 import threading
 import time
-from datetime import datetime
-from datetime import timezone
 from typing import Any

 from prometheus_client.core import GaugeMetricFamily
 from prometheus_client.registry import Collector
 from redis import Redis

+from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.configs.constants import OnyxCeleryQueues
@@ -31,6 +36,11 @@ logger = setup_logger()
 # the previous result without re-querying Redis/Postgres.
 _DEFAULT_CACHE_TTL = 30.0

+# Maximum time (seconds) a single _collect_fresh() call may take before
+# the collector gives up and returns stale/empty results. Prevents the
+# /metrics endpoint from hanging indefinitely when a DB or Redis query stalls.
+_DEFAULT_COLLECT_TIMEOUT = 120.0
+
 _QUEUE_LABEL_MAP: dict[str, str] = {
    OnyxCeleryQueues.PRIMARY: "primary",
    OnyxCeleryQueues.DOCPROCESSING: "docprocessing",
@@ -62,18 +72,32 @@ _UNACKED_QUEUES: list[str] = [


 class _CachedCollector(Collector):
-    """Base collector with TTL-based caching.
+    """Base collector with TTL-based caching and timeout protection.

    Subclasses implement ``_collect_fresh()`` to query the actual data source.
    The base ``collect()`` returns cached results if the TTL hasn't expired,
    avoiding repeated queries when Prometheus scrapes frequently.
+
+    A per-collection timeout prevents a slow DB or Redis query from blocking
+    the /metrics endpoint indefinitely. If _collect_fresh() exceeds the
+    timeout, stale cached results are returned instead.
    """

-    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
+    def __init__(
+        self,
+        cache_ttl: float = _DEFAULT_CACHE_TTL,
+        collect_timeout: float = _DEFAULT_COLLECT_TIMEOUT,
+    ) -> None:
        self._cache_ttl = cache_ttl
+        self._collect_timeout = collect_timeout
        self._cached_result: list[GaugeMetricFamily] | None = None
        self._last_collect_time: float = 0.0
        self._lock = threading.Lock()
+        self._executor = concurrent.futures.ThreadPoolExecutor(
+            max_workers=1,
+            thread_name_prefix=type(self).__name__,
+        )
+        self._inflight: concurrent.futures.Future | None = None

    def collect(self) -> list[GaugeMetricFamily]:
        with self._lock:
@@ -84,12 +108,28 @@ class _CachedCollector(Collector):
            ):
                return self._cached_result

+            # If a previous _collect_fresh() is still running, wait on it
+            # rather than queuing another. This prevents unbounded task
+            # accumulation in the executor during extended DB outages.
+            if self._inflight is not None and not self._inflight.done():
+                future = self._inflight
+            else:
+                future = self._executor.submit(self._collect_fresh)
+                self._inflight = future
+
            try:
-                result = self._collect_fresh()
+                result = future.result(timeout=self._collect_timeout)
+                self._inflight = None
                self._cached_result = result
                self._last_collect_time = now
                return result
+            except concurrent.futures.TimeoutError:
+                logger.warning(
+                    f"{type(self).__name__}._collect_fresh() timed out after {self._collect_timeout}s, returning stale cache"
+                )
+                return self._cached_result if self._cached_result is not None else []
            except Exception:
+                self._inflight = None
                logger.exception(f"Error in {type(self).__name__}.collect()")
                # Return stale cache on error rather than nothing — avoids
                # metrics disappearing during transient failures.
@@ -117,8 +157,6 @@ class QueueDepthCollector(_CachedCollector):
        if self._celery_app is None:
            return []

-        from onyx.background.celery.celery_redis import celery_get_broker_client
-
        redis_client = celery_get_broker_client(self._celery_app)

        depth = GaugeMetricFamily(
@@ -194,208 +232,6 @@ class QueueDepthCollector(_CachedCollector):
        return None


-class IndexAttemptCollector(_CachedCollector):
-    """Queries Postgres for index attempt state on each scrape."""
-
-    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
-        super().__init__(cache_ttl)
-        self._configured: bool = False
-        self._terminal_statuses: list = []
-
-    def configure(self) -> None:
-        """Call once DB engine is initialized."""
-        from onyx.db.enums import IndexingStatus
-
-        self._terminal_statuses = [s for s in IndexingStatus if s.is_terminal()]
-        self._configured = True
-
-    def _collect_fresh(self) -> list[GaugeMetricFamily]:
-        if not self._configured:
-            return []
-
-        from onyx.db.engine.sql_engine import get_session_with_current_tenant
-        from onyx.db.engine.tenant_utils import get_all_tenant_ids
-        from onyx.db.index_attempt import get_active_index_attempts_for_metrics
-        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
-
-        attempts_gauge = GaugeMetricFamily(
-            "onyx_index_attempts_active",
-            "Number of non-terminal index attempts",
-            labels=[
-                "status",
-                "source",
-                "tenant_id",
-                "connector_name",
-                "cc_pair_id",
-            ],
-        )
-
-        tenant_ids = get_all_tenant_ids()
-
-        for tid in tenant_ids:
-            # Defensive guard — get_all_tenant_ids() should never yield None,
-            # but we guard here for API stability in case the contract changes.
-            if tid is None:
-                continue
-            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)
-            try:
-                with get_session_with_current_tenant() as session:
-                    rows = get_active_index_attempts_for_metrics(session)
-
-                    for status, source, cc_id, cc_name, count in rows:
-                        name_val = cc_name or f"cc_pair_{cc_id}"
-                        attempts_gauge.add_metric(
-                            [
-                                status.value,
-                                source.value,
-                                tid,
-                                name_val,
-                                str(cc_id),
-                            ],
-                            count,
-                        )
-            finally:
-                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
-
-        return [attempts_gauge]
-
-
-class ConnectorHealthCollector(_CachedCollector):
-    """Queries Postgres for connector health state on each scrape."""
-
-    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
-        super().__init__(cache_ttl)
-        self._configured: bool = False
-
-    def configure(self) -> None:
-        """Call once DB engine is initialized."""
-        self._configured = True
-
-    def _collect_fresh(self) -> list[GaugeMetricFamily]:
-        if not self._configured:
-            return []
-
-        from onyx.db.connector_credential_pair import (
-            get_connector_health_for_metrics,
-        )
-        from onyx.db.engine.sql_engine import get_session_with_current_tenant
-        from onyx.db.engine.tenant_utils import get_all_tenant_ids
-        from onyx.db.index_attempt import get_docs_indexed_by_cc_pair
-        from onyx.db.index_attempt import get_failed_attempt_counts_by_cc_pair
-        from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
-
-        staleness_gauge = GaugeMetricFamily(
-            "onyx_connector_last_success_age_seconds",
-            "Seconds since last successful index for this connector",
-            labels=["tenant_id", "source", "cc_pair_id", "connector_name"],
-        )
-        error_state_gauge = GaugeMetricFamily(
-            "onyx_connector_in_error_state",
-            "Whether the connector is in a repeated error state (1=yes, 0=no)",
-            labels=["tenant_id", "source", "cc_pair_id", "connector_name"],
-        )
-        by_status_gauge = GaugeMetricFamily(
-            "onyx_connectors_by_status",
-            "Number of connectors grouped by status",
-            labels=["tenant_id", "status"],
-        )
-        error_total_gauge = GaugeMetricFamily(
-            "onyx_connectors_in_error_total",
-            "Total number of connectors in repeated error state",
-            labels=["tenant_id"],
-        )
-        per_connector_labels = [
-            "tenant_id",
-            "source",
-            "cc_pair_id",
-            "connector_name",
-        ]
-        docs_success_gauge = GaugeMetricFamily(
-            "onyx_connector_docs_indexed",
-            "Total new documents indexed (90-day rolling sum) per connector",
-            labels=per_connector_labels,
-        )
-        docs_error_gauge = GaugeMetricFamily(
-            "onyx_connector_error_count",
-            "Total number of failed index attempts per connector",
-            labels=per_connector_labels,
-        )
-
-        now = datetime.now(tz=timezone.utc)
-        tenant_ids = get_all_tenant_ids()
-
-        for tid in tenant_ids:
-            # Defensive guard — get_all_tenant_ids() should never yield None,
-            # but we guard here for API stability in case the contract changes.
-            if tid is None:
-                continue
-            token = CURRENT_TENANT_ID_CONTEXTVAR.set(tid)
-            try:
-                with get_session_with_current_tenant() as session:
-                    pairs = get_connector_health_for_metrics(session)
-                    error_counts_by_cc = get_failed_attempt_counts_by_cc_pair(session)
-                    docs_by_cc = get_docs_indexed_by_cc_pair(session)
-
-                    status_counts: dict[str, int] = {}
-                    error_count = 0
-
-                    for (
-                        cc_id,
-                        status,
-                        in_error,
-                        last_success,
-                        cc_name,
-                        source,
-                    ) in pairs:
-                        cc_id_str = str(cc_id)
-                        source_val = source.value
-                        name_val = cc_name or f"cc_pair_{cc_id}"
-                        label_vals = [tid, source_val, cc_id_str, name_val]
-
-                        if last_success is not None:
-                            # Both `now` and `last_success` are timezone-aware
-                            # (the DB column uses DateTime(timezone=True)),
-                            # so subtraction is safe.
-                            age = (now - last_success).total_seconds()
-                            staleness_gauge.add_metric(label_vals, age)
-
-                        error_state_gauge.add_metric(
-                            label_vals,
-                            1.0 if in_error else 0.0,
-                        )
-                        if in_error:
-                            error_count += 1
-
-                        docs_success_gauge.add_metric(
-                            label_vals,
-                            docs_by_cc.get(cc_id, 0),
-                        )
-
-                        docs_error_gauge.add_metric(
-                            label_vals,
-                            error_counts_by_cc.get(cc_id, 0),
-                        )
-
-                        status_val = status.value
-                        status_counts[status_val] = status_counts.get(status_val, 0) + 1
-
-                    for status_val, count in status_counts.items():
-                        by_status_gauge.add_metric([tid, status_val], count)
-
-                    error_total_gauge.add_metric([tid], error_count)
-            finally:
-                CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
-
-        return [
-            staleness_gauge,
-            error_state_gauge,
-            by_status_gauge,
-            error_total_gauge,
-            docs_success_gauge,
-            docs_error_gauge,
-        ]
-
-
 class RedisHealthCollector(_CachedCollector):
    """Collects Redis server health metrics (memory, clients, etc.)."""

@@ -411,8 +247,6 @@ class RedisHealthCollector(_CachedCollector):
        if self._celery_app is None:
            return []

-        from onyx.background.celery.celery_redis import celery_get_broker_client
-
        redis_client = celery_get_broker_client(self._celery_app)

        memory_used = GaugeMetricFamily(
@@ -495,7 +329,9 @@ class WorkerHeartbeatMonitor:
                        },
                    )
                    recv.capture(
-                        limit=None, timeout=self._HEARTBEAT_TIMEOUT_SECONDS, wakeup=True
+                        limit=None,
+                        timeout=self._HEARTBEAT_TIMEOUT_SECONDS,
+                        wakeup=True,
                    )
            except Exception:
                if self._running:
--- a/backend/onyx/server/metrics/indexing_pipeline_setup.py
+++ b/backend/onyx/server/metrics/indexing_pipeline_setup.py
@@ -6,8 +6,6 @@ Called once by the monitoring celery worker after Redis and DB are ready.
 from celery import Celery
 from prometheus_client.registry import REGISTRY

-from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
-from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
 from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
 from onyx.server.metrics.indexing_pipeline import RedisHealthCollector
 from onyx.server.metrics.indexing_pipeline import WorkerHealthCollector
@@ -21,8 +19,6 @@ logger = setup_logger()
 # module level ensures they survive the lifetime of the worker process and are
 # only registered with the Prometheus registry once.
 _queue_collector = QueueDepthCollector()
-_attempt_collector = IndexAttemptCollector()
-_connector_collector = ConnectorHealthCollector()
 _redis_health_collector = RedisHealthCollector()
 _worker_health_collector = WorkerHealthCollector()
 _heartbeat_monitor: WorkerHeartbeatMonitor | None = None
@@ -34,6 +30,9 @@ def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
    Args:
        celery_app: The Celery application instance. Used to obtain a
            broker Redis client on each scrape for queue depth metrics.
+
+    Note: connector health and index attempt metrics are push-based
+    (see connector_health_metrics.py) and do not use collectors.
    """
    _queue_collector.set_celery_app(celery_app)
    _redis_health_collector.set_celery_app(celery_app)
@@ -47,13 +46,8 @@ def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
        _heartbeat_monitor.start()
    _worker_health_collector.set_monitor(_heartbeat_monitor)

-    _attempt_collector.configure()
-    _connector_collector.configure()
-
    for collector in (
        _queue_collector,
-        _attempt_collector,
-        _connector_collector,
        _redis_health_collector,
        _worker_health_collector,
    ):
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -299,7 +299,7 @@ h11==0.16.0
    #   uvicorn
 h2==4.3.0
    # via httpx
-hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
    # via huggingface-hub
 hpack==4.1.0
    # via h2
@@ -322,6 +322,7 @@ httpx==0.28.1
    #   fastmcp
    #   google-genai
    #   httpx-oauth
+    #   huggingface-hub
    #   langfuse
    #   langsmith
    #   litellm
@@ -334,7 +335,7 @@ httpx-sse==0.4.3
    #   cohere
    #   mcp
 hubspot-api-client==11.1.0
-huggingface-hub==0.35.3
+huggingface-hub==1.10.2
    # via tokenizers
 humanfriendly==10.0
    # via coloredlogs
@@ -589,7 +590,7 @@ platformdirs==4.5.0
    # via
    #   fastmcp
    #   zeep
-playwright==1.55.0
+playwright==1.58.0
    # via pytest-playwright
 pluggy==1.6.0
    # via pytest
@@ -784,7 +785,6 @@ requests==2.33.0
    #   google-api-core
    #   google-genai
    #   hubspot-api-client
-    #   huggingface-hub
    #   jira
    #   jsonschema-path
    #   kubernetes
@@ -911,7 +911,7 @@ tiktoken==0.7.0
 timeago==1.0.16
 tld==0.13.1
    # via courlan
-tokenizers==0.21.4
+tokenizers==0.22.2
    # via
    #   chonkie
    #   cohere
@@ -933,7 +933,9 @@ tqdm==4.67.1
    #   unstructured
 trafilatura==1.12.2
 typer==0.20.0
-    # via mcp
+    # via
+    #   huggingface-hub
+    #   mcp
 types-awscrt==0.28.4
    # via botocore-stubs
 types-openpyxl==3.0.4.7
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@@ -82,6 +82,7 @@ click==8.3.1
    # via
    #   black
    #   litellm
+    #   typer
    #   uvicorn
 cohere==5.6.1
    # via onyx
@@ -153,7 +154,7 @@ h11==0.16.0
    #   httpcore
    #   uvicorn
 hatchling==1.28.0
-hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
    # via huggingface-hub
 httpcore==1.0.9
    # via httpx
@@ -161,6 +162,7 @@ httpx==0.28.1
    # via
    #   cohere
    #   google-genai
+    #   huggingface-hub
    #   litellm
    #   mcp
    #   openai
@@ -168,7 +170,7 @@ httpx-sse==0.4.3
    # via
    #   cohere
    #   mcp
-huggingface-hub==0.35.3
+huggingface-hub==1.10.2
    # via tokenizers
 identify==2.6.15
    # via pre-commit
@@ -219,6 +221,8 @@ litellm==1.81.6
 mako==1.2.4
    # via alembic
 manygo==0.2.0
+markdown-it-py==4.0.0
+    # via rich
 markupsafe==3.0.3
    # via
    #   jinja2
@@ -230,6 +234,8 @@ matplotlib-inline==0.2.1
    #   ipython
 mcp==1.26.0
    # via claude-agent-sdk
+mdurl==0.1.2
+    # via markdown-it-py
 multidict==6.7.0
    # via
    #   aiobotocore
@@ -340,6 +346,7 @@ pygments==2.20.0
    #   ipython
    #   ipython-pygments-lexers
    #   pytest
+    #   rich
 pyjwt==2.12.0
    # via mcp
 pyparsing==3.2.5
@@ -395,7 +402,6 @@ requests==2.33.0
    # via
    #   cohere
    #   google-genai
-    #   huggingface-hub
    #   kubernetes
    #   requests-oauthlib
    #   tiktoken
@@ -404,6 +410,8 @@ requests-oauthlib==1.3.1
    # via kubernetes
 retry==0.9.2
    # via onyx
+rich==14.2.0
+    # via typer
 rpds-py==0.29.0
    # via
    #   jsonschema
@@ -415,6 +423,8 @@ s3transfer==0.13.1
    # via boto3
 sentry-sdk==2.14.0
    # via onyx
+shellingham==1.5.4
+    # via typer
 six==1.17.0
    # via
    #   kubernetes
@@ -442,7 +452,7 @@ tenacity==9.1.2
    #   voyageai
 tiktoken==0.7.0
    # via litellm
-tokenizers==0.21.4
+tokenizers==0.22.2
    # via
    #   cohere
    #   litellm
@@ -463,6 +473,8 @@ traitlets==5.14.3
    #   matplotlib-inline
 trove-classifiers==2025.12.1.14
    # via hatchling
+typer==0.20.0
+    # via huggingface-hub
 types-beautifulsoup4==4.12.0.3
 types-html5lib==1.1.11.13
    # via types-beautifulsoup4
@@ -500,6 +512,7 @@ typing-extensions==4.15.0
    #   referencing
    #   sqlalchemy
    #   starlette
+    #   typer
    #   typing-inspection
 typing-inspection==0.4.2
    # via
--- a/backend/requirements/ee.txt
+++ b/backend/requirements/ee.txt
@@ -69,6 +69,7 @@ claude-agent-sdk==0.1.19
 click==8.3.1
    # via
    #   litellm
+    #   typer
    #   uvicorn
 cohere==5.6.1
    # via onyx
@@ -112,7 +113,7 @@ h11==0.16.0
    # via
    #   httpcore
    #   uvicorn
-hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
    # via huggingface-hub
 httpcore==1.0.9
    # via httpx
@@ -120,6 +121,7 @@ httpx==0.28.1
    # via
    #   cohere
    #   google-genai
+    #   huggingface-hub
    #   litellm
    #   mcp
    #   openai
@@ -127,7 +129,7 @@ httpx-sse==0.4.3
    # via
    #   cohere
    #   mcp
-huggingface-hub==0.35.3
+huggingface-hub==1.10.2
    # via tokenizers
 idna==3.11
    # via
@@ -156,10 +158,14 @@ kubernetes==31.0.0
    # via onyx
 litellm==1.81.6
    # via onyx
+markdown-it-py==4.0.0
+    # via rich
 markupsafe==3.0.3
    # via jinja2
 mcp==1.26.0
    # via claude-agent-sdk
+mdurl==0.1.2
+    # via markdown-it-py
 monotonic==1.6
    # via posthog
 multidict==6.7.0
@@ -217,6 +223,8 @@ pydantic-core==2.33.2
    # via pydantic
 pydantic-settings==2.12.0
    # via mcp
+pygments==2.20.0
+    # via rich
 pyjwt==2.12.0
    # via mcp
 python-dateutil==2.8.2
@@ -247,7 +255,6 @@ requests==2.33.0
    # via
    #   cohere
    #   google-genai
-    #   huggingface-hub
    #   kubernetes
    #   posthog
    #   requests-oauthlib
@@ -257,6 +264,8 @@ requests-oauthlib==1.3.1
    # via kubernetes
 retry==0.9.2
    # via onyx
+rich==14.2.0
+    # via typer
 rpds-py==0.29.0
    # via
    #   jsonschema
@@ -267,6 +276,8 @@ s3transfer==0.13.1
    # via boto3
 sentry-sdk==2.14.0
    # via onyx
+shellingham==1.5.4
+    # via typer
 six==1.17.0
    # via
    #   kubernetes
@@ -289,7 +300,7 @@ tenacity==9.1.2
    #   voyageai
 tiktoken==0.7.0
    # via litellm
-tokenizers==0.21.4
+tokenizers==0.22.2
    # via
    #   cohere
    #   litellm
@@ -297,6 +308,8 @@ tqdm==4.67.1
    # via
    #   huggingface-hub
    #   openai
+typer==0.20.0
+    # via huggingface-hub
 types-requests==2.32.0.20250328
    # via cohere
 typing-extensions==4.15.0
@@ -313,6 +326,7 @@ typing-extensions==4.15.0
    #   pydantic-core
    #   referencing
    #   starlette
+    #   typer
    #   typing-inspection
 typing-inspection==0.4.2
    # via
--- a/backend/requirements/model_server.txt
+++ b/backend/requirements/model_server.txt
@@ -78,6 +78,7 @@ click==8.3.1
    #   click-plugins
    #   click-repl
    #   litellm
+    #   typer
    #   uvicorn
 click-didyoumean==0.3.1
    # via celery
@@ -116,7 +117,6 @@ filelock==3.20.3
    # via
    #   huggingface-hub
    #   torch
-    #   transformers
 frozenlist==1.8.0
    # via
    #   aiohttp
@@ -135,7 +135,7 @@ h11==0.16.0
    # via
    #   httpcore
    #   uvicorn
-hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+hf-xet==1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
    # via huggingface-hub
 httpcore==1.0.9
    # via httpx
@@ -143,6 +143,7 @@ httpx==0.28.1
    # via
    #   cohere
    #   google-genai
+    #   huggingface-hub
    #   litellm
    #   mcp
    #   openai
@@ -150,7 +151,7 @@ httpx-sse==0.4.3
    # via
    #   cohere
    #   mcp
-huggingface-hub==0.35.3
+huggingface-hub==1.10.2
    # via
    #   accelerate
    #   sentence-transformers
@@ -189,10 +190,14 @@ kubernetes==31.0.0
    # via onyx
 litellm==1.81.6
    # via onyx
+markdown-it-py==4.0.0
+    # via rich
 markupsafe==3.0.3
    # via jinja2
 mcp==1.26.0
    # via claude-agent-sdk
+mdurl==0.1.2
+    # via markdown-it-py
 mpmath==1.3.0
    # via sympy
 multidict==6.7.0
@@ -207,6 +212,7 @@ numpy==2.4.1
    #   accelerate
    #   scikit-learn
    #   scipy
+    #   sentence-transformers
    #   transformers
    #   voyageai
 nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
@@ -264,8 +270,6 @@ packaging==24.2
    #   transformers
 parameterized==0.9.0
    # via cohere
-pillow==12.2.0
-    # via sentence-transformers
 prometheus-client==0.23.1
    # via
    #   onyx
@@ -305,6 +309,8 @@ pydantic-core==2.33.2
    # via pydantic
 pydantic-settings==2.12.0
    # via mcp
+pygments==2.20.0
+    # via rich
 pyjwt==2.12.0
    # via mcp
 python-dateutil==2.8.2
@@ -339,16 +345,16 @@ requests==2.33.0
    # via
    #   cohere
    #   google-genai
-    #   huggingface-hub
    #   kubernetes
    #   requests-oauthlib
    #   tiktoken
-    #   transformers
    #   voyageai
 requests-oauthlib==1.3.1
    # via kubernetes
 retry==0.9.2
    # via onyx
+rich==14.2.0
+    # via typer
 rpds-py==0.29.0
    # via
    #   jsonschema
@@ -367,11 +373,13 @@ scipy==1.16.3
    # via
    #   scikit-learn
    #   sentence-transformers
-sentence-transformers==4.0.2
+sentence-transformers==5.4.1
 sentry-sdk==2.14.0
    # via onyx
 setuptools==80.9.0 ; python_full_version >= '3.12'
    # via torch
+shellingham==1.5.4
+    # via typer
 six==1.17.0
    # via
    #   kubernetes
@@ -398,7 +406,7 @@ threadpoolctl==3.6.0
    # via scikit-learn
 tiktoken==0.7.0
    # via litellm
-tokenizers==0.21.4
+tokenizers==0.22.2
    # via
    #   cohere
    #   litellm
@@ -413,10 +421,14 @@ tqdm==4.67.1
    #   openai
    #   sentence-transformers
    #   transformers
-transformers==4.53.0
+transformers==5.5.4
    # via sentence-transformers
 triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
    # via torch
+typer==0.20.0
+    # via
+    #   huggingface-hub
+    #   transformers
 types-requests==2.32.0.20250328
    # via cohere
 typing-extensions==4.15.0
@@ -435,6 +447,7 @@ typing-extensions==4.15.0
    #   sentence-transformers
    #   starlette
    #   torch
+    #   typer
    #   typing-inspection
 typing-inspection==0.4.2
    # via
--- a/backend/tests/daily/connectors/google_drive/test_service_acct.py
+++ b/backend/tests/daily/connectors/google_drive/test_service_acct.py
@@ -4,6 +4,7 @@ from unittest.mock import patch
 from urllib.parse import urlparse

 from onyx.connectors.google_drive.connector import GoogleDriveConnector
+from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
 from tests.daily.connectors.google_drive.consts_and_utils import _pick
 from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
 from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
@@ -699,3 +700,43 @@ def test_specific_user_email_shared_with_me(

    doc_titles = set(doc.semantic_identifier for doc in output.documents)
    assert doc_titles == set(expected)
+
+
+@patch(
+    "onyx.file_processing.extract_file_text.get_unstructured_api_key",
+    return_value=None,
+)
+def test_slim_retrieval_does_not_call_permissions_list(
+    mock_get_api_key: MagicMock,  # noqa: ARG001
+    google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
+) -> None:
+    """retrieve_all_slim_docs() must not call permissions().list for any file.
+
+    Pruning only needs file IDs — fetching permissions per file causes O(N) API
+    calls that time out for tenants with large numbers of externally-owned files.
+    """
+    connector = google_drive_service_acct_connector_factory(
+        primary_admin_email=ADMIN_EMAIL,
+        include_shared_drives=True,
+        include_my_drives=True,
+        include_files_shared_with_me=False,
+        shared_folder_urls=None,
+        shared_drive_urls=None,
+        my_drive_emails=None,
+    )
+
+    with patch(
+        "onyx.connectors.google_drive.connector.execute_paginated_retrieval",
+        wraps=execute_paginated_retrieval,
+    ) as mock_paginated:
+        for batch in connector.retrieve_all_slim_docs():
+            pass
+
+    permissions_calls = [
+        c
+        for c in mock_paginated.call_args_list
+        if "permissions" in str(c.kwargs.get("retrieval_function", ""))
+    ]
+    assert (
+        len(permissions_calls) == 0
+    ), f"permissions().list was called {len(permissions_calls)} time(s) during pruning"
--- a/backend/tests/daily/connectors/utils.py
+++ b/backend/tests/daily/connectors/utils.py
@@ -12,6 +12,7 @@ from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import Document
 from onyx.connectors.models import HierarchyNode
 from onyx.connectors.models import ImageSection
+from onyx.connectors.models import TabularSection
 from onyx.connectors.models import TextSection

 _ITERATION_LIMIT = 100_000
@@ -141,13 +142,15 @@ def load_all_from_connector(

 def to_sections(
    documents: list[Document],
-) -> Iterator[TextSection | ImageSection]:
+) -> Iterator[TextSection | ImageSection | TabularSection]:
    for doc in documents:
        for section in doc.sections:
            yield section


-def to_text_sections(sections: Iterator[TextSection | ImageSection]) -> Iterator[str]:
+def to_text_sections(
+    sections: Iterator[TextSection | ImageSection | TabularSection],
+) -> Iterator[str]:
    for section in sections:
        if isinstance(section, TextSection):
            yield section.text
--- a/backend/tests/integration/common_utils/managers/document.py
+++ b/backend/tests/integration/common_utils/managers/document.py
@@ -12,7 +12,7 @@ from onyx.db.models import DocumentByConnectorCredentialPair
 from tests.integration.common_utils.constants import API_SERVER_URL
 from tests.integration.common_utils.constants import NUM_DOCS
 from tests.integration.common_utils.managers.api_key import DATestAPIKey
-from tests.integration.common_utils.managers.cc_pair import DATestCCPair
+from tests.integration.common_utils.test_models import DATestCCPair
 from tests.integration.common_utils.test_models import DATestUser
 from tests.integration.common_utils.test_models import SimpleTestDocument
 from tests.integration.common_utils.vespa import vespa_fixture
--- a/backend/tests/integration/conftest.py
+++ b/backend/tests/integration/conftest.py
@@ -14,7 +14,6 @@ from onyx.db.search_settings import get_current_search_settings
 from tests.integration.common_utils.constants import ADMIN_USER_NAME
 from tests.integration.common_utils.constants import GENERAL_HEADERS
 from tests.integration.common_utils.managers.api_key import APIKeyManager
-from tests.integration.common_utils.managers.cc_pair import CCPairManager
 from tests.integration.common_utils.managers.document import DocumentManager
 from tests.integration.common_utils.managers.image_generation import (
    ImageGenerationConfigManager,
@@ -196,6 +195,9 @@ def image_generation_config(

@pytest.fixture
 def document_builder(admin_user: DATestUser) -> DocumentBuilderType:
+    # HACK: Avoid importing generated OpenAPI client modules unless this fixture is used.
+    from tests.integration.common_utils.managers.cc_pair import CCPairManager
+
    api_key: DATestAPIKey = APIKeyManager.create(
        user_performing_action=admin_user,
    )
--- a/backend/tests/integration/mock_services/mock_connector_server/Dockerfile
+++ b/backend/tests/integration/mock_services/mock_connector_server/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.11.7-slim-bookworm
+FROM python:3.11-slim-bookworm@sha256:9c6f90801e6b68e772b7c0ca74260cbf7af9f320acec894e26fccdaccfbe3b47

 WORKDIR /app

--- a/backend/tests/integration/multitenant_tests/migrations/test_run_multitenant_migrations.py
+++ b/backend/tests/integration/multitenant_tests/migrations/test_run_multitenant_migrations.py
@@ -108,12 +108,12 @@ def current_head_rev() -> str:
        ["alembic", "heads", "--resolve-dependencies"],
        cwd=_BACKEND_DIR,
        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
+        stderr=subprocess.PIPE,
        text=True,
    )
    assert (
        result.returncode == 0
-    ), f"alembic heads failed (exit {result.returncode}):\n{result.stdout}"
+    ), f"alembic heads failed (exit {result.returncode}):\n{result.stdout}\n{result.stderr}"
    # Output looks like "d5c86e2c6dc6 (head)\n"
    rev = result.stdout.strip().split()[0]
    assert len(rev) > 0
--- a/backend/tests/integration/tests/personas/test_persona_knowledge_sources.py
+++ b/backend/tests/integration/tests/personas/test_persona_knowledge_sources.py
@@ -0,0 +1,83 @@
+"""
+Integration tests verifying the knowledge_sources field on MinimalPersonaSnapshot.
+
+The GET /persona endpoint returns MinimalPersonaSnapshot, which includes a
+knowledge_sources list derived from the persona's document sets, hierarchy
+nodes, attached documents, and user files.  These tests verify that the
+field is populated correctly.
+"""
+
+import requests
+
+from onyx.configs.constants import DocumentSource
+from tests.integration.common_utils.constants import API_SERVER_URL
+from tests.integration.common_utils.managers.file import FileManager
+from tests.integration.common_utils.managers.persona import PersonaManager
+from tests.integration.common_utils.test_file_utils import create_test_text_file
+from tests.integration.common_utils.test_models import DATestLLMProvider
+from tests.integration.common_utils.test_models import DATestUser
+
+
+def _get_minimal_persona(
+    persona_id: int,
+    user: DATestUser,
+) -> dict:
+    """Fetch personas from the list endpoint and find the one with the given id."""
+    response = requests.get(
+        f"{API_SERVER_URL}/persona",
+        params={"persona_ids": persona_id},
+        headers=user.headers,
+    )
+    response.raise_for_status()
+    personas = response.json()
+    matches = [p for p in personas if p["id"] == persona_id]
+    assert (
+        len(matches) == 1
+    ), f"Expected 1 persona with id={persona_id}, got {len(matches)}"
+    return matches[0]
+
+
+def test_persona_with_user_files_includes_user_file_source(
+    admin_user: DATestUser,
+    llm_provider: DATestLLMProvider,  # noqa: ARG001
+) -> None:
+    """When a persona has user files attached, knowledge_sources includes 'user_file'."""
+    text_file = create_test_text_file("test content for knowledge source verification")
+    file_descriptors, error = FileManager.upload_files(
+        files=[("test_ks.txt", text_file)],
+        user_performing_action=admin_user,
+    )
+    assert not error, f"File upload failed: {error}"
+
+    user_file_id = file_descriptors[0]["user_file_id"] or ""
+
+    persona = PersonaManager.create(
+        user_performing_action=admin_user,
+        name="KS User File Agent",
+        description="Agent with user files for knowledge_sources test",
+        system_prompt="You are a helpful assistant.",
+        user_file_ids=[user_file_id],
+    )
+
+    minimal = _get_minimal_persona(persona.id, admin_user)
+    assert (
+        DocumentSource.USER_FILE.value in minimal["knowledge_sources"]
+    ), f"Expected 'user_file' in knowledge_sources, got: {minimal['knowledge_sources']}"
+
+
+def test_persona_without_user_files_excludes_user_file_source(
+    admin_user: DATestUser,
+    llm_provider: DATestLLMProvider,  # noqa: ARG001
+) -> None:
+    """When a persona has no user files, knowledge_sources should not contain 'user_file'."""
+    persona = PersonaManager.create(
+        user_performing_action=admin_user,
+        name="KS No Files Agent",
+        description="Agent without files for knowledge_sources test",
+        system_prompt="You are a helpful assistant.",
+    )
+
+    minimal = _get_minimal_persona(persona.id, admin_user)
+    assert (
+        DocumentSource.USER_FILE.value not in minimal["knowledge_sources"]
+    ), f"Unexpected 'user_file' in knowledge_sources: {minimal['knowledge_sources']}"
--- a/backend/tests/unit/onyx/configs/init.py
+++ b/backend/tests/unit/onyx/configs/init.py
--- a/backend/tests/unit/onyx/configs/test_sentry.py
+++ b/backend/tests/unit/onyx/configs/test_sentry.py
@@ -0,0 +1,88 @@
+from typing import cast
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from sentry_sdk.types import Event
+
+import onyx.configs.sentry as sentry_module
+from onyx.configs.sentry import _add_instance_tags
+
+
+def _event(data: dict) -> Event:
+    """Helper to create a Sentry Event from a plain dict for testing."""
+    return cast(Event, data)
+
+
+def _reset_state() -> None:
+    """Reset the module-level resolved flag between tests."""
+    sentry_module._instance_id_resolved = False
+
+
+class TestAddInstanceTags:
+    def setup_method(self) -> None:
+        _reset_state()
+
+    @patch("onyx.utils.telemetry.get_or_generate_uuid", return_value="test-uuid-1234")
+    @patch("sentry_sdk.set_tag")
+    def test_first_event_sets_instance_id(
+        self, mock_set_tag: MagicMock, mock_uuid: MagicMock
+    ) -> None:
+        result = _add_instance_tags(_event({"message": "test error"}), {})
+
+        assert result is not None
+        assert result["tags"]["instance_id"] == "test-uuid-1234"
+        mock_set_tag.assert_called_once_with("instance_id", "test-uuid-1234")
+        mock_uuid.assert_called_once()
+
+    @patch("onyx.utils.telemetry.get_or_generate_uuid", return_value="test-uuid-1234")
+    @patch("sentry_sdk.set_tag")
+    def test_second_event_skips_resolution(
+        self, _mock_set_tag: MagicMock, mock_uuid: MagicMock
+    ) -> None:
+        _add_instance_tags(_event({"message": "first"}), {})
+        result = _add_instance_tags(_event({"message": "second"}), {})
+
+        assert result is not None
+        assert "tags" not in result  # second event not modified
+        mock_uuid.assert_called_once()  # only resolved once
+
+    @patch(
+        "onyx.utils.telemetry.get_or_generate_uuid",
+        side_effect=Exception("DB unavailable"),
+    )
+    @patch("sentry_sdk.set_tag")
+    def test_resolution_failure_still_returns_event(
+        self, _mock_set_tag: MagicMock, _mock_uuid: MagicMock
+    ) -> None:
+        result = _add_instance_tags(_event({"message": "test error"}), {})
+
+        assert result is not None
+        assert result["message"] == "test error"
+        assert "tags" not in result or "instance_id" not in result.get("tags", {})
+
+    @patch(
+        "onyx.utils.telemetry.get_or_generate_uuid",
+        side_effect=Exception("DB unavailable"),
+    )
+    @patch("sentry_sdk.set_tag")
+    def test_resolution_failure_retries_on_next_event(
+        self, _mock_set_tag: MagicMock, mock_uuid: MagicMock
+    ) -> None:
+        """If resolution fails (e.g. DB not ready), retry on the next event."""
+        _add_instance_tags(_event({"message": "first"}), {})
+        _add_instance_tags(_event({"message": "second"}), {})
+
+        assert mock_uuid.call_count == 2  # retried on second event
+
+    @patch("onyx.utils.telemetry.get_or_generate_uuid", return_value="test-uuid-1234")
+    @patch("sentry_sdk.set_tag")
+    def test_preserves_existing_tags(
+        self, _mock_set_tag: MagicMock, _mock_uuid: MagicMock
+    ) -> None:
+        result = _add_instance_tags(
+            _event({"message": "test", "tags": {"existing": "tag"}}), {}
+        )
+
+        assert result is not None
+        assert result["tags"]["existing"] == "tag"
+        assert result["tags"]["instance_id"] == "test-uuid-1234"
--- a/backend/tests/unit/onyx/connectors/canvas/test_canvas_connector.py
+++ b/backend/tests/unit/onyx/connectors/canvas/test_canvas_connector.py
@@ -8,14 +8,23 @@ from unittest.mock import patch

 import pytest

+from onyx.access.models import ExternalAccess
 from onyx.configs.constants import DocumentSource
 from onyx.connectors.canvas.client import CanvasApiClient
+from onyx.connectors.canvas.connector import _in_time_window
+from onyx.connectors.canvas.connector import _parse_canvas_dt
+from onyx.connectors.canvas.connector import _unix_to_canvas_time
 from onyx.connectors.canvas.connector import CanvasConnector
-from onyx.connectors.exceptions import ConnectorValidationError
+from onyx.connectors.canvas.connector import CanvasConnectorCheckpoint
+from onyx.connectors.canvas.connector import CanvasStage
 from onyx.connectors.exceptions import CredentialExpiredError
 from onyx.connectors.exceptions import InsufficientPermissionsError
 from onyx.connectors.exceptions import UnexpectedValidationError
+from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorMissingCredentialError
+from onyx.connectors.models import Document
+from onyx.connectors.models import HierarchyNode
+from onyx.error_handling.error_codes import OnyxErrorCode
 from onyx.error_handling.exceptions import OnyxError

 # ---------------------------------------------------------------------------
@@ -111,6 +120,56 @@ def _mock_response(
    return resp


+def _make_url_dispatcher(
+    courses: list[dict[str, Any]] | None = None,
+    pages: list[dict[str, Any]] | None = None,
+    assignments: list[dict[str, Any]] | None = None,
+    announcements: list[dict[str, Any]] | None = None,
+    page_error: bool = False,
+) -> Any:
+    """Return a callable that dispatches mock responses based on the request URL.
+
+    Meant to be assigned to ``mock_requests.get.side_effect``.
+    """
+    api_prefix = f"{FAKE_BASE_URL}/api/v1"
+
+    def _dispatcher(url: str, **_kwargs: Any) -> MagicMock:
+        if page_error:
+            return _mock_response(500, {})
+        if url == f"{api_prefix}/courses":
+            return _mock_response(json_data=courses or [])
+        if "/pages" in url:
+            return _mock_response(json_data=pages or [])
+        if "/assignments" in url:
+            return _mock_response(json_data=assignments or [])
+        if "announcements" in url:
+            return _mock_response(json_data=announcements or [])
+        return _mock_response(json_data=[])
+
+    return _dispatcher
+
+
+def _run_checkpoint(
+    connector: CanvasConnector,
+    checkpoint: CanvasConnectorCheckpoint,
+    start: float = 0.0,
+    end: float = datetime(2099, 1, 1, tzinfo=timezone.utc).timestamp(),
+) -> tuple[
+    list[Document | HierarchyNode | ConnectorFailure], CanvasConnectorCheckpoint
+]:
+    """Run load_from_checkpoint once and collect yielded items + returned checkpoint."""
+    gen = connector.load_from_checkpoint(start, end, checkpoint)
+    items: list[Document | HierarchyNode | ConnectorFailure] = []
+    new_checkpoint: CanvasConnectorCheckpoint | None = None
+    try:
+        while True:
+            items.append(next(gen))
+    except StopIteration as e:
+        new_checkpoint = e.value
+    assert new_checkpoint is not None
+    return items, new_checkpoint
+
+
 # ---------------------------------------------------------------------------
 # CanvasApiClient.__init__ tests
 # ---------------------------------------------------------------------------
@@ -269,15 +328,6 @@ class TestGet:

        assert exc_info.value.status_code == 404

-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_raises_on_429(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(429, {})
-
-        with pytest.raises(OnyxError) as exc_info:
-            self.client.get("courses")
-
-        assert exc_info.value.status_code == 429
-
    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_skips_params_when_using_full_url(self, mock_requests: MagicMock) -> None:
        mock_requests.get.return_value = _mock_response(json_data=[])
@@ -454,6 +504,149 @@ class TestPaginate:

        assert pages == []

+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_error_extracts_message_from_error_dict(
+        self, mock_requests: MagicMock
+    ) -> None:
+        """Shape 1: {"error": {"message": "Not authorized"}}"""
+        mock_requests.get.return_value = _mock_response(
+            403, {"error": {"message": "Not authorized"}}
+        )
+        client = CanvasApiClient(
+            bearer_token=FAKE_TOKEN,
+            canvas_base_url=FAKE_BASE_URL,
+        )
+
+        with pytest.raises(OnyxError) as exc_info:
+            client.get("courses")
+
+        result = exc_info.value.detail
+        expected = "Not authorized"
+
+        assert result == expected
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_error_extracts_message_from_error_string(
+        self, mock_requests: MagicMock
+    ) -> None:
+        """Shape 2: {"error": "Invalid access token"}"""
+        mock_requests.get.return_value = _mock_response(
+            401, {"error": "Invalid access token"}
+        )
+        client = CanvasApiClient(
+            bearer_token=FAKE_TOKEN,
+            canvas_base_url=FAKE_BASE_URL,
+        )
+
+        with pytest.raises(OnyxError) as exc_info:
+            client.get("courses")
+
+        result = exc_info.value.detail
+        expected = "Invalid access token"
+
+        assert result == expected
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_error_extracts_message_from_errors_list(
+        self, mock_requests: MagicMock
+    ) -> None:
+        """Shape 3: {"errors": [{"message": "Invalid query"}]}"""
+        mock_requests.get.return_value = _mock_response(
+            400, {"errors": [{"message": "Invalid query"}]}
+        )
+        client = CanvasApiClient(
+            bearer_token=FAKE_TOKEN,
+            canvas_base_url=FAKE_BASE_URL,
+        )
+
+        with pytest.raises(OnyxError) as exc_info:
+            client.get("courses")
+
+        result = exc_info.value.detail
+        expected = "Invalid query"
+
+        assert result == expected
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_error_dict_takes_priority_over_errors_list(
+        self, mock_requests: MagicMock
+    ) -> None:
+        """When both error shapes are present, error dict wins."""
+        mock_requests.get.return_value = _mock_response(
+            403, {"error": "Specific error", "errors": [{"message": "Generic"}]}
+        )
+        client = CanvasApiClient(
+            bearer_token=FAKE_TOKEN,
+            canvas_base_url=FAKE_BASE_URL,
+        )
+
+        with pytest.raises(OnyxError) as exc_info:
+            client.get("courses")
+
+        result = exc_info.value.detail
+        expected = "Specific error"
+
+        assert result == expected
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_error_falls_back_to_reason_when_no_json_message(
+        self, mock_requests: MagicMock
+    ) -> None:
+        """Empty error body falls back to response.reason."""
+        mock_requests.get.return_value = _mock_response(500, {})
+        client = CanvasApiClient(
+            bearer_token=FAKE_TOKEN,
+            canvas_base_url=FAKE_BASE_URL,
+        )
+
+        with pytest.raises(OnyxError) as exc_info:
+            client.get("courses")
+
+        result = exc_info.value.detail
+        expected = "Error"  # from _mock_response's reason for >= 300
+
+        assert result == expected
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_invalid_json_on_success_raises(self, mock_requests: MagicMock) -> None:
+        """Invalid JSON on a 2xx response raises OnyxError."""
+        resp = MagicMock()
+        resp.status_code = 200
+        resp.json.side_effect = ValueError("No JSON")
+        resp.headers = {"Link": ""}
+        mock_requests.get.return_value = resp
+        client = CanvasApiClient(
+            bearer_token=FAKE_TOKEN,
+            canvas_base_url=FAKE_BASE_URL,
+        )
+
+        with pytest.raises(OnyxError, match="Invalid JSON"):
+            client.get("courses")
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_invalid_json_on_error_falls_back_to_reason(
+        self, mock_requests: MagicMock
+    ) -> None:
+        """Invalid JSON on a 4xx response falls back to response.reason."""
+        resp = MagicMock()
+        resp.status_code = 500
+        resp.reason = "Internal Server Error"
+        resp.json.side_effect = ValueError("No JSON")
+        resp.headers = {"Link": ""}
+        mock_requests.get.return_value = resp
+        client = CanvasApiClient(
+            bearer_token=FAKE_TOKEN,
+            canvas_base_url=FAKE_BASE_URL,
+        )
+
+        with pytest.raises(OnyxError) as exc_info:
+            client.get("courses")
+
+        result = exc_info.value.detail
+        expected = "Internal Server Error"
+
+        assert result == expected
+

 # ---------------------------------------------------------------------------
 # CanvasApiClient._parse_next_link tests
@@ -588,6 +781,16 @@ class TestConnectorUrlNormalization:

        assert result == expected

+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_load_credentials_insufficient_permissions(
+        self, mock_requests: MagicMock
+    ) -> None:
+        mock_requests.get.return_value = _mock_response(403, {})
+        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)
+
+        with pytest.raises(InsufficientPermissionsError):
+            connector.load_credentials({"canvas_access_token": FAKE_TOKEN})
+

 # ---------------------------------------------------------------------------
 # CanvasConnector — document conversion
@@ -766,10 +969,6 @@ class TestValidateConnectorSettings:
    def test_validate_insufficient_permissions(self, mock_requests: MagicMock) -> None:
        self._assert_validate_raises(403, InsufficientPermissionsError, mock_requests)

-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_validate_rate_limited(self, mock_requests: MagicMock) -> None:
-        self._assert_validate_raises(429, ConnectorValidationError, mock_requests)
-
    @patch("onyx.connectors.canvas.client.rl_requests")
    def test_validate_unexpected_error(self, mock_requests: MagicMock) -> None:
        self._assert_validate_raises(500, UnexpectedValidationError, mock_requests)
@@ -874,3 +1073,652 @@ class TestListAnnouncements:
        result = connector._list_announcements(course_id=1)

        assert result == []
+
+
+class TestCheckpoint:
+    def test_build_dummy_checkpoint(self) -> None:
+        connector = _build_connector()
+
+        cp = connector.build_dummy_checkpoint()
+
+        assert cp.has_more is True
+        assert cp.course_ids == []
+        assert cp.current_course_index == 0
+        assert cp.stage == CanvasStage.PAGES
+
+    def test_validate_checkpoint_json(self) -> None:
+        connector = _build_connector()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True,
+            course_ids=[1, 2],
+            current_course_index=1,
+            stage=CanvasStage.ASSIGNMENTS,
+        )
+
+        json_str = cp.model_dump_json()
+        restored = connector.validate_checkpoint_json(json_str)
+
+        assert restored.course_ids == [1, 2]
+        assert restored.current_course_index == 1
+        assert restored.stage == CanvasStage.ASSIGNMENTS
+        assert restored.has_more is True
+
+
+# ---------------------------------------------------------------------------
+# load_from_checkpoint tests
+# ---------------------------------------------------------------------------
+
+
+class TestLoadFromCheckpoint:
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_first_call_materializes_courses(self, mock_requests: MagicMock) -> None:
+        """First call should populate course_ids and yield no documents."""
+        mock_requests.get.side_effect = _make_url_dispatcher(
+            courses=[_mock_course(1), _mock_course(2, "Data Structures", "CS201")]
+        )
+        connector = _build_connector()
+        cp = connector.build_dummy_checkpoint()
+
+        items, new_cp = _run_checkpoint(connector, cp)
+
+        assert items == []
+        assert new_cp.course_ids == [1, 2]
+        assert new_cp.current_course_index == 0
+        assert new_cp.stage == CanvasStage.PAGES
+        assert new_cp.has_more is True
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_processes_pages_stage(self, mock_requests: MagicMock) -> None:
+        """Pages stage yields page documents within the time window."""
+        mock_requests.get.side_effect = _make_url_dispatcher(
+            pages=[_mock_page(10, "Syllabus", "2025-06-15T12:00:00Z")]
+        )
+        connector = _build_connector()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True,
+            course_ids=[1],
+            current_course_index=0,
+            stage=CanvasStage.PAGES,
+        )
+        start = datetime(2025, 6, 1, 0, 0, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, 0, 0, tzinfo=timezone.utc).timestamp()
+
+        items, new_cp = _run_checkpoint(connector, cp, start, end)
+
+        expected_count = 1
+        expected_id = "canvas-page-1-10"
+        assert len(items) == expected_count
+        assert isinstance(items[0], Document)
+        assert items[0].id == expected_id
+        assert new_cp.stage == CanvasStage.ASSIGNMENTS
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_advances_through_all_stages(self, mock_requests: MagicMock) -> None:
+        """Calling checkpoint 3 times advances pages -> assignments -> announcements -> next course."""
+        page = _mock_page(10, updated_at="2025-06-15T12:00:00Z")
+        assignment = _mock_assignment(20, updated_at="2025-06-15T12:00:00Z")
+        announcement = _mock_announcement(30, posted_at="2025-06-15T12:00:00Z")
+        mock_requests.get.side_effect = _make_url_dispatcher(
+            pages=[page], assignments=[assignment], announcements=[announcement]
+        )
+        connector = _build_connector()
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True,
+            course_ids=[1],
+            current_course_index=0,
+            stage=CanvasStage.PAGES,
+        )
+
+        # Stage 1: pages
+        items1, cp = _run_checkpoint(connector, cp, start, end)
+
+        assert cp.stage == CanvasStage.ASSIGNMENTS
+        assert len(items1) == 1
+
+        # Stage 2: assignments
+        mock_requests.get.side_effect = _make_url_dispatcher(assignments=[assignment])
+
+        items2, cp = _run_checkpoint(connector, cp, start, end)
+
+        assert cp.stage == CanvasStage.ANNOUNCEMENTS
+        assert len(items2) == 1
+
+        # Stage 3: announcements -> advances course index
+        mock_requests.get.side_effect = _make_url_dispatcher(
+            announcements=[announcement]
+        )
+
+        items3, cp = _run_checkpoint(connector, cp, start, end)
+
+        assert cp.current_course_index == 1
+        assert cp.stage == CanvasStage.PAGES
+        assert cp.has_more is False
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_filters_by_time_window(self, mock_requests: MagicMock) -> None:
+        """Only documents within (start, end] are yielded."""
+        old_page = _mock_page(10, updated_at="2025-01-01T00:00:00Z")
+        new_page = _mock_page(11, title="New Page", updated_at="2025-06-15T12:00:00Z")
+        mock_requests.get.side_effect = _make_url_dispatcher(pages=[new_page, old_page])
+        connector = _build_connector()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True,
+            course_ids=[1],
+            current_course_index=0,
+            stage=CanvasStage.PAGES,
+        )
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+
+        items, _ = _run_checkpoint(connector, cp, start, end)
+
+        expected_count = 1
+        expected_id = "canvas-page-1-11"
+
+        assert len(items) == expected_count
+        assert isinstance(items[0], Document)
+        assert items[0].id == expected_id
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_skips_announcement_without_posted_at(
+        self, mock_requests: MagicMock
+    ) -> None:
+        announcement = _mock_announcement()
+        announcement["posted_at"] = None
+        mock_requests.get.side_effect = _make_url_dispatcher(
+            announcements=[announcement]
+        )
+        connector = _build_connector()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True,
+            course_ids=[1],
+            current_course_index=0,
+            stage=CanvasStage.ANNOUNCEMENTS,
+        )
+
+        items, _ = _run_checkpoint(connector, cp)
+
+        assert len(items) == 0
+
+    def test_stage_failure_advances_stage_and_yields_failure(self) -> None:
+        """A 500 on a stage fetch yields a stage-level ConnectorFailure and
+        advances to the next stage, so the framework doesn't loop on the
+        same failing state forever."""
+        connector = _build_connector()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True,
+            course_ids=[1, 2],
+            current_course_index=0,
+            stage=CanvasStage.PAGES,
+        )
+
+        with patch.object(
+            connector,
+            "_fetch_stage_page",
+            side_effect=OnyxError(
+                OnyxErrorCode.INTERNAL_ERROR,
+                "boom",
+                status_code_override=500,
+            ),
+        ):
+            items, new_cp = _run_checkpoint(connector, cp)
+
+        expected_entity_id = "canvas-pages-1"
+        assert len(items) == 1
+        assert isinstance(items[0], ConnectorFailure)
+        assert items[0].failed_entity is not None
+        assert items[0].failed_entity.entity_id == expected_entity_id
+        assert new_cp.stage == CanvasStage.ASSIGNMENTS
+        assert new_cp.current_course_index == 0
+        assert new_cp.next_url is None
+        assert new_cp.has_more is True
+
+    def test_course_404_advances_course_and_yields_failure(self) -> None:
+        """A 404 on a stage fetch means the whole course is inaccessible —
+        yield a course-level ConnectorFailure and skip to the next course
+        instead of burning API calls on every stage of a missing course."""
+        connector = _build_connector()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True,
+            course_ids=[1, 2],
+            current_course_index=0,
+            stage=CanvasStage.PAGES,
+        )
+
+        with patch.object(
+            connector,
+            "_fetch_stage_page",
+            side_effect=OnyxError(
+                OnyxErrorCode.NOT_FOUND,
+                "course gone",
+                status_code_override=404,
+            ),
+        ):
+            items, new_cp = _run_checkpoint(connector, cp)
+
+        expected_entity_id = "canvas-course-1"
+        expected_next_course_index = 1
+        assert len(items) == 1
+        assert isinstance(items[0], ConnectorFailure)
+        assert items[0].failed_entity is not None
+        assert items[0].failed_entity.entity_id == expected_entity_id
+        assert new_cp.current_course_index == expected_next_course_index
+        assert new_cp.stage == CanvasStage.PAGES
+        assert new_cp.next_url is None
+        assert new_cp.has_more is True
+
+    def test_fatal_auth_failure_during_stage_fetch_propagates(self) -> None:
+        connector = _build_connector()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True,
+            course_ids=[1],
+            current_course_index=0,
+            stage=CanvasStage.PAGES,
+        )
+
+        with patch("onyx.connectors.canvas.client.rl_requests") as mock_requests:
+            mock_requests.get.return_value = _mock_response(401, {})
+            with pytest.raises(CredentialExpiredError):
+                _run_checkpoint(connector, cp)
+
+    def test_security_failure_during_stage_fetch_propagates(self) -> None:
+        connector = _build_connector()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True,
+            course_ids=[1],
+            current_course_index=0,
+            stage=CanvasStage.PAGES,
+        )
+
+        with patch.object(
+            connector,
+            "_fetch_stage_page",
+            side_effect=OnyxError(OnyxErrorCode.BAD_GATEWAY, "bad next link"),
+        ):
+            with pytest.raises(OnyxError, match="bad next link"):
+                _run_checkpoint(connector, cp)
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_per_document_conversion_failure_yields_connector_failure(
+        self, mock_requests: MagicMock
+    ) -> None:
+        """Bad data for one page yields ConnectorFailure, doesn't stop processing."""
+        bad_page = {
+            "page_id": 10,
+            "url": "test",
+            "title": "Test",
+            "body": None,
+            "created_at": "2025-06-15T12:00:00Z",
+            "updated_at": "bad-date",
+        }
+        mock_requests.get.side_effect = _make_url_dispatcher(pages=[bad_page])
+        connector = _build_connector()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True,
+            course_ids=[1],
+            current_course_index=0,
+            stage=CanvasStage.PAGES,
+        )
+
+        items, new_cp = _run_checkpoint(connector, cp)
+
+        assert len(items) == 1
+        assert isinstance(items[0], ConnectorFailure)
+        assert new_cp.stage == CanvasStage.ASSIGNMENTS
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_all_courses_done_sets_has_more_false(
+        self, mock_requests: MagicMock
+    ) -> None:
+        mock_requests.get.side_effect = _make_url_dispatcher()
+        connector = _build_connector()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True, course_ids=[1], current_course_index=1
+        )
+
+        items, new_cp = _run_checkpoint(connector, cp)
+
+        assert items == []
+        assert new_cp.has_more is False
+
+    def test_invalid_stage_raises_value_error(self) -> None:
+        connector = _build_connector()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True,
+            course_ids=[1],
+            current_course_index=0,
+            stage=CanvasStage.PAGES,
+        )
+        cp.stage = "invalid"  # type: ignore[assignment]
+
+        with pytest.raises(ValueError, match="Invalid checkpoint stage"):
+            _run_checkpoint(connector, cp)
+
+
+# ---------------------------------------------------------------------------
+# load_from_checkpoint_with_perm_sync tests
+# ---------------------------------------------------------------------------
+
+
+class TestLoadFromCheckpointWithPermSync:
+    @patch("onyx.connectors.canvas.connector.get_course_permissions")
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_documents_have_external_access(
+        self, mock_requests: MagicMock, mock_perms: MagicMock
+    ) -> None:
+        """load_from_checkpoint_with_perm_sync attaches ExternalAccess to documents."""
+        expected_access = ExternalAccess(
+            external_user_emails={"student@school.edu"},
+            external_user_group_ids=set(),
+            is_public=False,
+        )
+        mock_perms.return_value = expected_access
+        mock_requests.get.side_effect = _make_url_dispatcher(
+            pages=[_mock_page(10, "Syllabus", "2025-06-15T12:00:00Z")]
+        )
+        connector = _build_connector()
+        cp = CanvasConnectorCheckpoint(
+            has_more=True,
+            course_ids=[1],
+            current_course_index=0,
+            stage=CanvasStage.PAGES,
+        )
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+
+        gen = connector.load_from_checkpoint_with_perm_sync(start, end, cp)
+        items: list[Document | HierarchyNode | ConnectorFailure] = []
+        new_cp: CanvasConnectorCheckpoint | None = None
+        try:
+            while True:
+                items.append(next(gen))
+        except StopIteration as e:
+            new_cp = e.value
+        assert new_cp is not None
+
+        assert len(items) == 1
+        assert isinstance(items[0], Document)
+        assert items[0].external_access == expected_access
+        assert new_cp.stage == CanvasStage.ASSIGNMENTS
+        mock_perms.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# Helper function tests
+# ---------------------------------------------------------------------------
+
+
+class TestParseCanvasDt:
+    def test_z_suffix_parsed_as_utc(self) -> None:
+        result = _parse_canvas_dt("2025-06-15T12:00:00Z")
+
+        expected = datetime(2025, 6, 15, 12, 0, 0, tzinfo=timezone.utc)
+        assert result == expected
+
+    def test_plus_offset_parsed_as_utc(self) -> None:
+        result = _parse_canvas_dt("2025-06-15T12:00:00+00:00")
+
+        expected = datetime(2025, 6, 15, 12, 0, 0, tzinfo=timezone.utc)
+        assert result == expected
+
+    def test_result_is_timezone_aware(self) -> None:
+        result = _parse_canvas_dt("2025-01-01T00:00:00Z")
+
+        assert result.tzinfo is not None
+
+
+class TestUnixToCanvasTime:
+    def test_known_epoch_produces_expected_string(self) -> None:
+        epoch = datetime(2025, 6, 15, 12, 0, 0, tzinfo=timezone.utc).timestamp()
+
+        result = _unix_to_canvas_time(epoch)
+
+        assert result == "2025-06-15T12:00:00Z"
+
+    def test_round_trips_with_parse_canvas_dt(self) -> None:
+        epoch = datetime(2025, 3, 10, 8, 30, 0, tzinfo=timezone.utc).timestamp()
+
+        result = _parse_canvas_dt(_unix_to_canvas_time(epoch))
+        expected = datetime(2025, 3, 10, 8, 30, 0, tzinfo=timezone.utc)
+
+        assert result == expected
+
+
+class TestInTimeWindow:
+    def test_inside_window(self) -> None:
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+
+        result = _in_time_window("2025-06-15T12:00:00Z", start, end)
+
+        assert result is True
+
+    def test_before_window(self) -> None:
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+
+        result = _in_time_window("2025-05-01T12:00:00Z", start, end)
+
+        assert result is False
+
+    def test_after_window(self) -> None:
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+
+        result = _in_time_window("2025-07-15T12:00:00Z", start, end)
+
+        assert result is False
+
+    def test_start_boundary_is_exclusive(self) -> None:
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+
+        result = _in_time_window("2025-06-01T00:00:00Z", start, end)
+
+        assert result is False
+
+    def test_end_boundary_is_inclusive(self) -> None:
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+
+        result = _in_time_window("2025-06-30T00:00:00Z", start, end)
+
+        assert result is True
+
+
+class TestFetchStagePage:
+    def test_uses_full_url_when_next_url_set(self) -> None:
+        connector = _build_connector()
+        with patch.object(
+            connector.canvas_client, "get", return_value=([{"id": 1}], None)
+        ) as mock_get:
+            result, next_url = connector._fetch_stage_page(
+                next_url="https://myschool.instructure.com/api/v1/courses?page=2",
+                endpoint="courses/1/pages",
+                params={"per_page": "100"},
+            )
+
+        mock_get.assert_called_once_with(
+            full_url="https://myschool.instructure.com/api/v1/courses?page=2"
+        )
+        assert result == [{"id": 1}]
+
+    def test_uses_endpoint_and_params_when_no_next_url(self) -> None:
+        connector = _build_connector()
+        with patch.object(
+            connector.canvas_client, "get", return_value=([{"id": 1}], None)
+        ) as mock_get:
+            result, next_url = connector._fetch_stage_page(
+                next_url=None,
+                endpoint="courses/1/pages",
+                params={"per_page": "100"},
+            )
+
+        mock_get.assert_called_once_with(
+            endpoint="courses/1/pages", params={"per_page": "100"}
+        )
+
+    def test_returns_empty_list_for_none_response(self) -> None:
+        connector = _build_connector()
+        with patch.object(connector.canvas_client, "get", return_value=(None, None)):
+            result, next_url = connector._fetch_stage_page(
+                next_url=None,
+                endpoint="courses/1/pages",
+                params={},
+            )
+
+        assert result == []
+        assert next_url is None
+
+
+class TestProcessItems:
+    def test_pages_in_window_converted(self) -> None:
+        connector = _build_connector()
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+
+        results, early_exit = connector._process_items(
+            response=[_mock_page(10, "Syllabus", "2025-06-15T12:00:00Z")],
+            stage=CanvasStage.PAGES,
+            course_id=1,
+            start=start,
+            end=end,
+            include_permissions=False,
+        )
+
+        assert len(results) == 1
+        assert isinstance(results[0], Document)
+        assert early_exit is False
+
+    def test_pages_outside_window_skipped(self) -> None:
+        connector = _build_connector()
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+
+        results, early_exit = connector._process_items(
+            response=[_mock_page(10, "Old", "2025-01-01T12:00:00Z")],
+            stage=CanvasStage.PAGES,
+            course_id=1,
+            start=start,
+            end=end,
+            include_permissions=False,
+        )
+
+        assert results == []
+        assert early_exit is True
+
+    def test_assignments_in_window_converted(self) -> None:
+        connector = _build_connector()
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+
+        results, early_exit = connector._process_items(
+            response=[_mock_assignment(20, "HW1", 1, "2025-06-15T12:00:00Z")],
+            stage=CanvasStage.ASSIGNMENTS,
+            course_id=1,
+            start=start,
+            end=end,
+            include_permissions=False,
+        )
+
+        assert len(results) == 1
+        assert isinstance(results[0], Document)
+        assert early_exit is False
+
+    def test_announcements_in_window_converted(self) -> None:
+        connector = _build_connector()
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+
+        results, early_exit = connector._process_items(
+            response=[_mock_announcement(30, "News", 1, "2025-06-15T12:00:00Z")],
+            stage=CanvasStage.ANNOUNCEMENTS,
+            course_id=1,
+            start=start,
+            end=end,
+            include_permissions=False,
+        )
+
+        assert len(results) == 1
+        assert isinstance(results[0], Document)
+        assert early_exit is False
+
+    def test_bad_item_yields_connector_failure(self) -> None:
+        connector = _build_connector()
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+        bad_page = {
+            "page_id": 10,
+            "url": "test",
+            "title": "Test",
+            "body": None,
+            "created_at": "2025-06-15T12:00:00Z",
+            "updated_at": "bad-date",
+        }
+
+        results, early_exit = connector._process_items(
+            response=[bad_page],
+            stage=CanvasStage.PAGES,
+            course_id=1,
+            start=start,
+            end=end,
+            include_permissions=False,
+        )
+
+        assert len(results) == 1
+        assert isinstance(results[0], ConnectorFailure)
+
+    def test_page_early_exit_on_old_item(self) -> None:
+        """Pages sorted desc — item before start triggers early exit."""
+        connector = _build_connector()
+        start = datetime(2025, 6, 1, tzinfo=timezone.utc).timestamp()
+        end = datetime(2025, 6, 30, tzinfo=timezone.utc).timestamp()
+
+        results, early_exit = connector._process_items(
+            response=[
+                _mock_page(10, "New", "2025-06-15T12:00:00Z"),
+                _mock_page(11, "Old", "2025-05-01T12:00:00Z"),
+                _mock_page(12, "Older", "2025-04-01T12:00:00Z"),
+            ],
+            stage=CanvasStage.PAGES,
+            course_id=1,
+            start=start,
+            end=end,
+            include_permissions=False,
+        )
+
+        assert len(results) == 1
+        assert early_exit is True
+
+
+class TestMaybeAttachPermissions:
+    def test_attaches_permissions_when_true(self) -> None:
+        connector = _build_connector()
+        doc = MagicMock(spec=Document)
+        doc.external_access = None
+        expected_access = ExternalAccess(
+            external_user_emails={"student@school.edu"},
+            external_user_group_ids=set(),
+            is_public=False,
+        )
+        with patch.object(
+            connector, "_get_course_permissions", return_value=expected_access
+        ):
+            result = connector._maybe_attach_permissions(
+                doc, course_id=1, include_permissions=True
+            )
+
+        assert result.external_access == expected_access
+
+    def test_no_op_when_false(self) -> None:
+        connector = _build_connector()
+        doc = MagicMock(spec=Document)
+        doc.external_access = None
+
+        result = connector._maybe_attach_permissions(
+            doc, course_id=1, include_permissions=False
+        )
+
+        assert result.external_access is None
--- a/backend/tests/unit/onyx/connectors/google_drive/init.py
+++ b/backend/tests/unit/onyx/connectors/google_drive/init.py
--- a/backend/tests/unit/onyx/connectors/google_drive/test_slim_retrieval.py
+++ b/backend/tests/unit/onyx/connectors/google_drive/test_slim_retrieval.py
@@ -0,0 +1,200 @@
+"""Unit tests for GoogleDriveConnector slim retrieval routing.
+
+Verifies that:
+- GoogleDriveConnector implements SlimConnector so pruning takes the ID-only path
+- retrieve_all_slim_docs() calls _extract_slim_docs_from_google_drive with include_permissions=False
+- retrieve_all_slim_docs_perm_sync() calls _extract_slim_docs_from_google_drive with include_permissions=True
+- celery_utils routing picks retrieve_all_slim_docs() for GoogleDriveConnector
+"""
+
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from onyx.background.celery.celery_utils import extract_ids_from_runnable_connector
+from onyx.connectors.google_drive.connector import GoogleDriveConnector
+from onyx.connectors.google_drive.models import DriveRetrievalStage
+from onyx.connectors.google_drive.models import GoogleDriveCheckpoint
+from onyx.connectors.interfaces import SlimConnector
+from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.models import SlimDocument
+from onyx.utils.threadpool_concurrency import ThreadSafeDict
+
+
+def _make_done_checkpoint() -> GoogleDriveCheckpoint:
+    return GoogleDriveCheckpoint(
+        retrieved_folder_and_drive_ids=set(),
+        completion_stage=DriveRetrievalStage.DONE,
+        completion_map=ThreadSafeDict(),
+        all_retrieved_file_ids=set(),
+        has_more=False,
+    )
+
+
+def _make_connector() -> GoogleDriveConnector:
+    connector = GoogleDriveConnector(include_my_drives=True)
+    connector._creds = MagicMock()
+    connector._primary_admin_email = "admin@example.com"
+    return connector
+
+
+class TestGoogleDriveSlimConnectorInterface:
+    def test_implements_slim_connector(self) -> None:
+        connector = _make_connector()
+        assert isinstance(connector, SlimConnector)
+
+    def test_implements_slim_connector_with_perm_sync(self) -> None:
+        connector = _make_connector()
+        assert isinstance(connector, SlimConnectorWithPermSync)
+
+    def test_slim_connector_checked_before_perm_sync(self) -> None:
+        """SlimConnector must appear before SlimConnectorWithPermSync in MRO
+        so celery_utils isinstance check routes to retrieve_all_slim_docs()."""
+        mro = GoogleDriveConnector.__mro__
+        slim_idx = mro.index(SlimConnector)
+        perm_sync_idx = mro.index(SlimConnectorWithPermSync)
+        assert slim_idx < perm_sync_idx
+
+
+class TestRetrieveAllSlimDocs:
+    def test_does_not_call_extract_when_checkpoint_is_done(self) -> None:
+        connector = _make_connector()
+        slim_doc = MagicMock(
+            spec=SlimDocument, id="doc1", parent_hierarchy_raw_node_id=None
+        )
+
+        with patch.object(
+            connector, "build_dummy_checkpoint", return_value=_make_done_checkpoint()
+        ):
+            with patch.object(
+                connector,
+                "_extract_slim_docs_from_google_drive",
+                return_value=iter([[slim_doc]]),
+            ) as mock_extract:
+                list(connector.retrieve_all_slim_docs())
+
+        mock_extract.assert_not_called()  # loop exits immediately since checkpoint is DONE
+
+    def test_calls_extract_with_include_permissions_false_non_done_checkpoint(
+        self,
+    ) -> None:
+        connector = _make_connector()
+        slim_doc = MagicMock(
+            spec=SlimDocument, id="doc1", parent_hierarchy_raw_node_id=None
+        )
+        # Checkpoint starts at START, _extract advances it to DONE
+        with patch.object(connector, "build_dummy_checkpoint") as mock_build:
+            start_checkpoint = GoogleDriveCheckpoint(
+                retrieved_folder_and_drive_ids=set(),
+                completion_stage=DriveRetrievalStage.START,
+                completion_map=ThreadSafeDict(),
+                all_retrieved_file_ids=set(),
+                has_more=False,
+            )
+            mock_build.return_value = start_checkpoint
+
+            def _advance_checkpoint(**_kwargs: object) -> object:
+                start_checkpoint.completion_stage = DriveRetrievalStage.DONE
+                yield [slim_doc]
+
+            with patch.object(
+                connector,
+                "_extract_slim_docs_from_google_drive",
+                side_effect=_advance_checkpoint,
+            ) as mock_extract:
+                list(connector.retrieve_all_slim_docs())
+
+        mock_extract.assert_called_once()
+        _, kwargs = mock_extract.call_args
+        assert kwargs.get("include_permissions") is False
+
+    def test_yields_slim_documents(self) -> None:
+        connector = _make_connector()
+        slim_doc = MagicMock(
+            spec=SlimDocument, id="doc1", parent_hierarchy_raw_node_id=None
+        )
+        start_checkpoint = GoogleDriveCheckpoint(
+            retrieved_folder_and_drive_ids=set(),
+            completion_stage=DriveRetrievalStage.START,
+            completion_map=ThreadSafeDict(),
+            all_retrieved_file_ids=set(),
+            has_more=False,
+        )
+
+        with patch.object(
+            connector, "build_dummy_checkpoint", return_value=start_checkpoint
+        ):
+
+            def _advance_and_yield(**_kwargs: object) -> object:
+                start_checkpoint.completion_stage = DriveRetrievalStage.DONE
+                yield [slim_doc]
+
+            with patch.object(
+                connector,
+                "_extract_slim_docs_from_google_drive",
+                side_effect=_advance_and_yield,
+            ):
+                batches = list(connector.retrieve_all_slim_docs())
+
+        assert len(batches) == 1
+        assert batches[0][0] is slim_doc
+
+
+class TestRetrieveAllSlimDocsPermSync:
+    def test_calls_extract_with_include_permissions_true(self) -> None:
+        connector = _make_connector()
+        slim_doc = MagicMock(
+            spec=SlimDocument, id="doc1", parent_hierarchy_raw_node_id=None
+        )
+        start_checkpoint = GoogleDriveCheckpoint(
+            retrieved_folder_and_drive_ids=set(),
+            completion_stage=DriveRetrievalStage.START,
+            completion_map=ThreadSafeDict(),
+            all_retrieved_file_ids=set(),
+            has_more=False,
+        )
+
+        with patch.object(
+            connector, "build_dummy_checkpoint", return_value=start_checkpoint
+        ):
+
+            def _advance_and_yield(**_kwargs: object) -> object:
+                start_checkpoint.completion_stage = DriveRetrievalStage.DONE
+                yield [slim_doc]
+
+            with patch.object(
+                connector,
+                "_extract_slim_docs_from_google_drive",
+                side_effect=_advance_and_yield,
+            ) as mock_extract:
+                list(connector.retrieve_all_slim_docs_perm_sync())
+
+        mock_extract.assert_called_once()
+        _, kwargs = mock_extract.call_args
+        assert (
+            kwargs.get("include_permissions") is None
+            or kwargs.get("include_permissions") is True
+        )
+
+
+class TestCeleryUtilsRouting:
+    def test_pruning_uses_retrieve_all_slim_docs(self) -> None:
+        """extract_ids_from_runnable_connector must call retrieve_all_slim_docs,
+        not retrieve_all_slim_docs_perm_sync, for GoogleDriveConnector."""
+        connector = _make_connector()
+        slim_doc = MagicMock(
+            spec=SlimDocument, id="doc1", parent_hierarchy_raw_node_id=None
+        )
+        with (
+            patch.object(
+                connector, "retrieve_all_slim_docs", return_value=iter([[slim_doc]])
+            ) as mock_slim,
+            patch.object(
+                connector, "retrieve_all_slim_docs_perm_sync"
+            ) as mock_perm_sync,
+        ):
+            extract_ids_from_runnable_connector(
+                connector, connector_type="google_drive"
+            )
+
+        mock_slim.assert_called_once()
+        mock_perm_sync.assert_not_called()
--- a/backend/tests/unit/onyx/db/test_index_attempt_errors.py
+++ b/backend/tests/unit/onyx/db/test_index_attempt_errors.py
@@ -0,0 +1,86 @@
+"""Tests for get_index_attempt_errors_across_connectors."""
+
+from datetime import datetime
+from datetime import timezone
+from unittest.mock import MagicMock
+
+from onyx.db.index_attempt import get_index_attempt_errors_across_connectors
+from onyx.db.models import IndexAttemptError
+
+
+def _make_error(
+    id: int = 1,
+    cc_pair_id: int = 1,
+    error_type: str | None = "TimeoutError",
+    is_resolved: bool = False,
+) -> IndexAttemptError:
+    """Create a mock IndexAttemptError."""
+    error = MagicMock(spec=IndexAttemptError)
+    error.id = id
+    error.connector_credential_pair_id = cc_pair_id
+    error.error_type = error_type
+    error.is_resolved = is_resolved
+    return error
+
+
+class TestGetIndexAttemptErrorsAcrossConnectors:
+    def test_returns_errors_and_count(self) -> None:
+        mock_session = MagicMock()
+        mock_errors = [_make_error(id=1), _make_error(id=2)]
+        mock_session.scalar.return_value = 2
+        mock_session.scalars.return_value.all.return_value = mock_errors
+
+        errors, total = get_index_attempt_errors_across_connectors(
+            db_session=mock_session,
+        )
+
+        assert total == 2
+        assert len(errors) == 2
+
+    def test_returns_empty_when_no_errors(self) -> None:
+        mock_session = MagicMock()
+        mock_session.scalar.return_value = 0
+        mock_session.scalars.return_value.all.return_value = []
+
+        errors, total = get_index_attempt_errors_across_connectors(
+            db_session=mock_session,
+        )
+
+        assert total == 0
+        assert errors == []
+
+    def test_null_count_returns_zero(self) -> None:
+        mock_session = MagicMock()
+        mock_session.scalar.return_value = None
+        mock_session.scalars.return_value.all.return_value = []
+
+        errors, total = get_index_attempt_errors_across_connectors(
+            db_session=mock_session,
+        )
+
+        assert total == 0
+
+    def test_passes_filters_to_query(self) -> None:
+        """Verify that filter parameters result in .where() calls on the statement."""
+        mock_session = MagicMock()
+        mock_session.scalar.return_value = 0
+        mock_session.scalars.return_value.all.return_value = []
+
+        start = datetime(2026, 1, 1, tzinfo=timezone.utc)
+        end = datetime(2026, 12, 31, tzinfo=timezone.utc)
+
+        # Should not raise — just verifying the function accepts all filter params
+        get_index_attempt_errors_across_connectors(
+            db_session=mock_session,
+            cc_pair_id=42,
+            error_type="TimeoutError",
+            start_time=start,
+            end_time=end,
+            unresolved_only=True,
+            page=2,
+            page_size=10,
+        )
+
+        # The function should have called scalar (for count) and scalars (for results)
+        assert mock_session.scalar.called
+        assert mock_session.scalars.called
--- a/backend/tests/unit/onyx/file_processing/test_xlsx_to_text.py
+++ b/backend/tests/unit/onyx/file_processing/test_xlsx_to_text.py
@@ -1,9 +1,13 @@
 import io
 from typing import cast
+from unittest.mock import MagicMock

 import openpyxl
 from openpyxl.worksheet.worksheet import Worksheet

+from onyx.file_processing.extract_file_text import _clean_worksheet_matrix
+from onyx.file_processing.extract_file_text import _worksheet_to_matrix
+from onyx.file_processing.extract_file_text import xlsx_sheet_extraction
 from onyx.file_processing.extract_file_text import xlsx_to_text


@@ -196,3 +200,182 @@ class TestXlsxToText:
        assert "r1c1" in lines[0] and "r1c2" in lines[0]
        assert "r2c1" in lines[1] and "r2c2" in lines[1]
        assert "r3c1" in lines[2] and "r3c2" in lines[2]
+
+
+class TestWorksheetToMatrixJaggedRows:
+    """openpyxl read_only mode can yield rows of differing widths when
+    trailing cells are empty. The matrix must be padded to a rectangle
+    so downstream column cleanup can index safely."""
+
+    def test_pads_shorter_trailing_rows(self) -> None:
+        ws = MagicMock()
+        ws.iter_rows.return_value = iter(
+            [
+                ("A", "B", "C"),
+                ("X", "Y"),
+                ("P",),
+            ]
+        )
+        matrix = _worksheet_to_matrix(ws)
+        assert matrix == [["A", "B", "C"], ["X", "Y", ""], ["P", "", ""]]
+
+    def test_pads_when_first_row_is_shorter(self) -> None:
+        ws = MagicMock()
+        ws.iter_rows.return_value = iter(
+            [
+                ("A",),
+                ("X", "Y", "Z"),
+            ]
+        )
+        matrix = _worksheet_to_matrix(ws)
+        assert matrix == [["A", "", ""], ["X", "Y", "Z"]]
+
+    def test_clean_worksheet_matrix_no_index_error_on_jagged_rows(self) -> None:
+        """Regression: previously raised IndexError when a later row was
+        shorter than the first row and the out-of-range column on the
+        first row was empty (so the short-circuit in `all()` did not
+        save us)."""
+        ws = MagicMock()
+        ws.iter_rows.return_value = iter(
+            [
+                ("A", "", "", "B"),
+                ("X", "Y"),
+            ]
+        )
+        matrix = _worksheet_to_matrix(ws)
+        # Must not raise.
+        cleaned = _clean_worksheet_matrix(matrix)
+        assert cleaned == [["A", "", "", "B"], ["X", "Y", "", ""]]
+
+
+class TestXlsxSheetExtraction:
+    def test_one_tuple_per_sheet(self) -> None:
+        xlsx = _make_xlsx(
+            {
+                "Revenue": [["Month", "Amount"], ["Jan", "100"]],
+                "Expenses": [["Category", "Cost"], ["Rent", "500"]],
+            }
+        )
+        sheets = xlsx_sheet_extraction(xlsx)
+        assert len(sheets) == 2
+        # Order preserved from workbook sheet order
+        titles = [title for _csv, title in sheets]
+        assert titles == ["Revenue", "Expenses"]
+        # Content present in the right tuple
+        revenue_csv, _ = sheets[0]
+        expenses_csv, _ = sheets[1]
+        assert "Month" in revenue_csv
+        assert "Jan" in revenue_csv
+        assert "Category" in expenses_csv
+        assert "Rent" in expenses_csv
+
+    def test_tuple_structure_is_csv_text_then_title(self) -> None:
+        """The tuple order is (csv_text, sheet_title) — pin it so callers
+        that unpack positionally don't silently break."""
+        xlsx = _make_xlsx({"MySheet": [["a", "b"]]})
+        sheets = xlsx_sheet_extraction(xlsx)
+        assert len(sheets) == 1
+        csv_text, title = sheets[0]
+        assert title == "MySheet"
+        assert "a" in csv_text
+        assert "b" in csv_text
+
+    def test_empty_sheet_is_skipped(self) -> None:
+        """A sheet whose CSV output is empty/whitespace-only should NOT
+        appear in the result — the `if csv_text.strip():` guard filters
+        it out."""
+        xlsx = _make_xlsx(
+            {
+                "Data": [["a", "b"]],
+                "Empty": [],
+            }
+        )
+        sheets = xlsx_sheet_extraction(xlsx)
+        assert len(sheets) == 1
+        assert sheets[0][1] == "Data"
+
+    def test_empty_workbook_returns_empty_list(self) -> None:
+        """All sheets empty → empty list (not a list of empty tuples)."""
+        xlsx = _make_xlsx({"Sheet1": [], "Sheet2": []})
+        sheets = xlsx_sheet_extraction(xlsx)
+        assert sheets == []
+
+    def test_single_sheet(self) -> None:
+        xlsx = _make_xlsx({"Only": [["x", "y"], ["1", "2"]]})
+        sheets = xlsx_sheet_extraction(xlsx)
+        assert len(sheets) == 1
+        csv_text, title = sheets[0]
+        assert title == "Only"
+        assert "x" in csv_text
+        assert "1" in csv_text
+
+    def test_bad_zip_returns_empty_list(self) -> None:
+        bad_file = io.BytesIO(b"not a zip file")
+        sheets = xlsx_sheet_extraction(bad_file, file_name="test.xlsx")
+        assert sheets == []
+
+    def test_bad_zip_tilde_file_returns_empty_list(self) -> None:
+        """`~$`-prefixed files are Excel lock files; failure should log
+        at debug (not warning) and still return []."""
+        bad_file = io.BytesIO(b"not a zip file")
+        sheets = xlsx_sheet_extraction(bad_file, file_name="~$temp.xlsx")
+        assert sheets == []
+
+    def test_csv_content_matches_xlsx_to_text_per_sheet(self) -> None:
+        """For a single-sheet workbook, xlsx_to_text output should equal
+        the csv_text from xlsx_sheet_extraction — they share the same
+        per-sheet CSV-ification logic."""
+        single_sheet_data = [["Name", "Age"], ["Alice", "30"]]
+        expected_text = xlsx_to_text(_make_xlsx({"People": single_sheet_data}))
+
+        sheets = xlsx_sheet_extraction(_make_xlsx({"People": single_sheet_data}))
+        assert len(sheets) == 1
+        csv_text, title = sheets[0]
+        assert title == "People"
+        assert csv_text.strip() == expected_text.strip()
+
+    def test_commas_in_cells_are_quoted(self) -> None:
+        xlsx = _make_xlsx({"S1": [["hello, world", "normal"]]})
+        sheets = xlsx_sheet_extraction(xlsx)
+        assert len(sheets) == 1
+        csv_text, _ = sheets[0]
+        assert '"hello, world"' in csv_text
+
+    def test_long_empty_row_run_capped_within_sheet(self) -> None:
+        """The matrix cleanup applies per-sheet: >2 empty rows collapse
+        to 2, which keeps the sheet non-empty and it still appears in
+        the result."""
+        xlsx = _make_xlsx(
+            {
+                "S1": [
+                    ["header"],
+                    [""],
+                    [""],
+                    [""],
+                    [""],
+                    ["data"],
+                ]
+            }
+        )
+        sheets = xlsx_sheet_extraction(xlsx)
+        assert len(sheets) == 1
+        csv_text, _ = sheets[0]
+        lines = csv_text.strip().split("\n")
+        # header + 2 empty (capped) + data = 4 lines
+        assert len(lines) == 4
+        assert "header" in lines[0]
+        assert "data" in lines[-1]
+
+    def test_sheet_title_with_special_chars_preserved(self) -> None:
+        """Spaces, punctuation, unicode in sheet titles are preserved
+        verbatim — the title is used as a link anchor downstream."""
+        xlsx = _make_xlsx(
+            {
+                "Q1 Revenue (USD)": [["a", "b"]],
+                "Données": [["c", "d"]],
+            }
+        )
+        sheets = xlsx_sheet_extraction(xlsx)
+        titles = [title for _csv, title in sheets]
+        assert "Q1 Revenue (USD)" in titles
+        assert "Données" in titles
--- a/backend/tests/unit/onyx/indexing/test_tabular_section_chunker.py
+++ b/backend/tests/unit/onyx/indexing/test_tabular_section_chunker.py
@@ -0,0 +1,551 @@
+"""End-to-end tests for `TabularChunker.chunk_section`.
+
+Each test is structured as:
+    INPUT    — the CSV text passed to the chunker + token budget + link
+    EXPECTED — the exact chunk texts the chunker should emit
+    ACT      — a single call to `chunk_section`
+    ASSERT   — literal equality against the expected chunk texts
+
+A character-level tokenizer (1 char == 1 token) is used so token-budget
+arithmetic is deterministic and expected chunks can be spelled out
+exactly.
+"""
+
+from onyx.connectors.models import Section
+from onyx.connectors.models import TabularSection
+from onyx.indexing.chunking.section_chunker import AccumulatorState
+from onyx.indexing.chunking.tabular_section_chunker import TabularChunker
+from onyx.natural_language_processing.utils import BaseTokenizer
+
+
+class CharTokenizer(BaseTokenizer):
+    def encode(self, string: str) -> list[int]:
+        return [ord(c) for c in string]
+
+    def tokenize(self, string: str) -> list[str]:
+        return list(string)
+
+    def decode(self, tokens: list[int]) -> str:
+        return "".join(chr(t) for t in tokens)
+
+
+def _make_chunker() -> TabularChunker:
+    return TabularChunker(tokenizer=CharTokenizer())
+
+
+def _tabular_section(text: str, link: str = "sheet:Test") -> Section:
+    return TabularSection(text=text, link=link)
+
+
+class TestTabularChunkerChunkSection:
+    def test_simple_csv_all_rows_fit_one_chunk(self) -> None:
+        # --- INPUT -----------------------------------------------------
+        csv_text = "Name,Age,City\n" "Alice,30,NYC\n" "Bob,25,SF\n"
+        link = "sheet:People"
+        content_token_limit = 500
+
+        # --- EXPECTED --------------------------------------------------
+        expected_texts = [
+            (
+                "sheet:People\n"
+                "Columns: Name, Age, City\n"
+                "Name=Alice, Age=30, City=NYC\n"
+                "Name=Bob, Age=25, City=SF"
+            ),
+        ]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=content_token_limit,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+        assert [p.is_continuation for p in out.payloads] == [False]
+        assert all(p.links == {0: link} for p in out.payloads)
+        assert out.accumulator.is_empty()
+
+    def test_overflow_splits_into_two_deterministic_chunks(self) -> None:
+        # --- INPUT -----------------------------------------------------
+        # prelude = "sheet:S\nColumns: col, val" (25 chars = 25 tokens)
+        # At content_token_limit=57, row_budget = max(16, 57-31-1) = 25.
+        # Each row "col=a, val=1" is 12 tokens; two rows + \n = 25 (fits),
+        # three rows + 2×\n = 38 (overflows) → split after 2 rows.
+        csv_text = "col,val\n" "a,1\n" "b,2\n" "c,3\n" "d,4\n"
+        link = "sheet:S"
+        content_token_limit = 57
+
+        # --- EXPECTED --------------------------------------------------
+        expected_texts = [
+            ("sheet:S\n" "Columns: col, val\n" "col=a, val=1\n" "col=b, val=2"),
+            ("sheet:S\n" "Columns: col, val\n" "col=c, val=3\n" "col=d, val=4"),
+        ]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=content_token_limit,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+        # First chunk is fresh; subsequent chunks mark as continuations.
+        assert [p.is_continuation for p in out.payloads] == [False, True]
+        # Link carries through every chunk.
+        assert all(p.links == {0: link} for p in out.payloads)
+
+    # Add back in shortly
+    # def test_header_only_csv_produces_single_prelude_chunk(self) -> None:
+    #     # --- INPUT -----------------------------------------------------
+    #     csv_text = "col1,col2\n"
+    #     link = "sheet:Headers"
+
+    #     # --- EXPECTED --------------------------------------------------
+    #     expected_texts = [
+    #         "sheet:Headers\nColumns: col1, col2",
+    #     ]
+
+    #     # --- ACT -------------------------------------------------------
+    #     out = _make_chunker().chunk_section(
+    #         _tabular_section(csv_text, link=link),
+    #         AccumulatorState(),
+    #         content_token_limit=500,
+    #     )
+
+    #     # --- ASSERT ----------------------------------------------------
+    #     assert [p.text for p in out.payloads] == expected_texts
+
+    def test_empty_cells_dropped_from_chunk_text(self) -> None:
+        # --- INPUT -----------------------------------------------------
+        # Alice's Age is empty; Bob's City is empty. Empty cells should
+        # not appear as `field=` pairs in the output.
+        csv_text = "Name,Age,City\n" "Alice,,NYC\n" "Bob,25,\n"
+        link = "sheet:P"
+
+        # --- EXPECTED --------------------------------------------------
+        expected_texts = [
+            (
+                "sheet:P\n"
+                "Columns: Name, Age, City\n"
+                "Name=Alice, City=NYC\n"
+                "Name=Bob, Age=25"
+            ),
+        ]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=500,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+
+    def test_quoted_commas_in_csv_preserved_as_one_field(self) -> None:
+        # --- INPUT -----------------------------------------------------
+        # "Hello, world" is quoted in the CSV, so csv.reader parses it as
+        # a single field. The surrounding quotes are stripped during
+        # decoding, so the chunk text carries the bare value.
+        csv_text = "Name,Notes\n" 'Alice,"Hello, world"\n'
+        link = "sheet:P"
+
+        # --- EXPECTED --------------------------------------------------
+        expected_texts = [
+            ("sheet:P\n" "Columns: Name, Notes\n" "Name=Alice, Notes=Hello, world"),
+        ]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=500,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+
+    def test_blank_rows_in_csv_are_skipped(self) -> None:
+        # --- INPUT -----------------------------------------------------
+        # Stray blank rows in the CSV (e.g. export artifacts) shouldn't
+        # produce ghost rows in the output.
+        csv_text = "A,B\n" "\n" "1,2\n" "\n" "\n" "3,4\n"
+        link = "sheet:S"
+
+        # --- EXPECTED --------------------------------------------------
+        expected_texts = [
+            ("sheet:S\n" "Columns: A, B\n" "A=1, B=2\n" "A=3, B=4"),
+        ]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=500,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+
+    def test_accumulator_flushes_before_tabular_chunks(self) -> None:
+        # --- INPUT -----------------------------------------------------
+        # A text accumulator was populated by the prior text section.
+        # Tabular sections are structural boundaries, so the pending
+        # text is flushed as its own chunk before the tabular content.
+        pending_text = "prior paragraph from an earlier text section"
+        pending_link = "prev-link"
+
+        csv_text = "a,b\n" "1,2\n"
+        link = "sheet:S"
+
+        # --- EXPECTED --------------------------------------------------
+        expected_texts = [
+            pending_text,  # flushed accumulator
+            ("sheet:S\n" "Columns: a, b\n" "a=1, b=2"),
+        ]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(
+                text=pending_text,
+                link_offsets={0: pending_link},
+            ),
+            content_token_limit=500,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+        # Flushed chunk keeps the prior text's link; tabular chunk uses
+        # the tabular section's link.
+        assert out.payloads[0].links == {0: pending_link}
+        assert out.payloads[1].links == {0: link}
+        # Accumulator resets — tabular section is a structural boundary.
+        assert out.accumulator.is_empty()
+
+    def test_multi_row_packing_under_budget_emits_single_chunk(self) -> None:
+        # --- INPUT -----------------------------------------------------
+        # Three small rows (20 tokens each) under a generous
+        # content_token_limit=100 should pack into ONE chunk — prelude
+        # emitted once, rows stacked beneath it.
+        csv_text = (
+            "x\n" "aaaaaaaaaaaaaaaaaa\n" "bbbbbbbbbbbbbbbbbb\n" "cccccccccccccccccc\n"
+        )
+        link = "S"
+        content_token_limit = 100
+
+        # --- EXPECTED --------------------------------------------------
+        # Each formatted row "x=<18-char value>" = 20 tokens.
+        # Full chunk with sheet + Columns + 3 rows =
+        #   1 + 1 + 10 + 1 + (20 + 1 + 20 + 1 + 20) = 75 tokens ≤ 100.
+        # Single chunk carries all three rows.
+        expected_texts = [
+            "S\n"
+            "Columns: x\n"
+            "x=aaaaaaaaaaaaaaaaaa\n"
+            "x=bbbbbbbbbbbbbbbbbb\n"
+            "x=cccccccccccccccccc"
+        ]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=content_token_limit,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+        assert [p.is_continuation for p in out.payloads] == [False]
+        assert all(len(p.text) <= content_token_limit for p in out.payloads)
+
+    def test_packing_reserves_prelude_budget_so_every_chunk_has_full_prelude(
+        self,
+    ) -> None:
+        # --- INPUT -----------------------------------------------------
+        # Budget (30) is large enough for all 5 bare rows (row_block =
+        # 24 tokens) to pack as one chunk if the prelude were optional,
+        # but [sheet] + Columns + 5_rows would be 41 tokens > 30. The
+        # packing logic reserves space for the prelude: only 2 rows
+        # pack per chunk (17 prelude overhead + 9 rows = 26 ≤ 30).
+        # Every emitted chunk therefore carries its full prelude rather
+        # than dropping Columns at emit time.
+        csv_text = "x\n" "aa\n" "bb\n" "cc\n" "dd\n" "ee\n"
+        link = "S"
+        content_token_limit = 30
+
+        # --- EXPECTED --------------------------------------------------
+        # Prelude overhead = 'S\nColumns: x\n' = 1+1+10+1 = 13.
+        # Each row "x=XX" = 4 tokens, row separator "\n" = 1.
+        #   3 rows: 13 + (4+1+4+1+4) = 27 ≤ 30 ✓
+        #   4 rows: 13 + (4+1+4+1+4+1+4) = 32 > 30 ✗
+        # → 3 rows in the first chunk, 2 rows in the second.
+        expected_texts = [
+            "S\nColumns: x\nx=aa\nx=bb\nx=cc",
+            "S\nColumns: x\nx=dd\nx=ee",
+        ]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=content_token_limit,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+        # Every chunk fits under the budget AND carries its full
+        # prelude — that's the whole point of this check.
+        assert all(len(p.text) <= content_token_limit for p in out.payloads)
+        assert all("Columns: x" in p.text for p in out.payloads)
+
+    def test_oversized_row_splits_into_field_pieces_no_prelude(self) -> None:
+        # --- INPUT -----------------------------------------------------
+        # Single-row CSV whose formatted form ("field 1=1, ..." = 53
+        # tokens) exceeds content_token_limit (20). Per the chunker's
+        # rules, oversized rows are split at field boundaries into
+        # pieces each ≤ max_tokens, and no prelude is added to split
+        # pieces (they already consume the full budget). A 53-token row
+        # packs into 3 field-boundary pieces under a 20-token budget.
+        csv_text = "field 1,field 2,field 3,field 4,field 5\n" "1,2,3,4,5\n"
+        link = "S"
+        content_token_limit = 20
+
+        # --- EXPECTED --------------------------------------------------
+        # Row = "field 1=1, field 2=2, field 3=3, field 4=4, field 5=5"
+        # Fields @ 9 tokens each, ", " sep = 2 tokens.
+        #   "field 1=1, field 2=2" = 9+2+9 = 20 tokens ≤ 20 ✓
+        #   + ", field 3=3"        = 20+2+9 = 31 > 20 → flush, start new
+        #   "field 3=3, field 4=4" = 9+2+9 = 20 ≤ 20 ✓
+        #   + ", field 5=5"        = 20+2+9 = 31 > 20 → flush, start new
+        #   "field 5=5"            = 9 ≤ 20 ✓
+        # ceil(53 / 20) = 3 chunks.
+        expected_texts = [
+            "field 1=1, field 2=2",
+            "field 3=3, field 4=4",
+            "field 5=5",
+        ]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=content_token_limit,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+        # Invariant: no chunk exceeds max_tokens.
+        assert all(len(p.text) <= content_token_limit for p in out.payloads)
+        # is_continuation: first chunk False, rest True.
+        assert [p.is_continuation for p in out.payloads] == [False, True, True]
+
+    def test_empty_tabular_section_flushes_accumulator_and_resets_it(
+        self,
+    ) -> None:
+        # --- INPUT -----------------------------------------------------
+        # Tabular sections are structural boundaries, so any pending text
+        # buffer is flushed to a chunk before parsing the tabular content
+        # — even if the tabular section itself is empty. The accumulator
+        # is then reset.
+        pending_text = "prior paragraph"
+        pending_link_offsets = {0: "prev-link"}
+
+        # --- EXPECTED --------------------------------------------------
+        expected_texts = [pending_text]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section("", link="sheet:Empty"),
+            AccumulatorState(
+                text=pending_text,
+                link_offsets=pending_link_offsets,
+            ),
+            content_token_limit=500,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+        assert out.accumulator.is_empty()
+
+    def test_single_oversized_field_token_splits_at_id_boundaries(self) -> None:
+        # --- INPUT -----------------------------------------------------
+        # A single `field=value` pair that itself exceeds max_tokens can't
+        # be split at field boundaries — there's only one field. The
+        # chunker falls back to encoding the pair to token ids and
+        # slicing at max-token-sized windows.
+        #
+        # CSV has one column "x" with a 50-char value. Formatted pair =
+        # "x=" + 50 a's = 52 tokens. Budget = 10.
+        csv_text = "x\n" + ("a" * 50) + "\n"
+        link = "S"
+        content_token_limit = 10
+
+        # --- EXPECTED --------------------------------------------------
+        # 52-char pair at 10 tokens per window = 6 pieces:
+        #   [0:10)  "x=aaaaaaaa"   (10)
+        #   [10:20) "aaaaaaaaaa"   (10)
+        #   [20:30) "aaaaaaaaaa"   (10)
+        #   [30:40) "aaaaaaaaaa"   (10)
+        #   [40:50) "aaaaaaaaaa"   (10)
+        #   [50:52) "aa"           (2)
+        # Split pieces carry no prelude (they already consume the budget).
+        expected_texts = [
+            "x=aaaaaaaa",
+            "aaaaaaaaaa",
+            "aaaaaaaaaa",
+            "aaaaaaaaaa",
+            "aaaaaaaaaa",
+            "aa",
+        ]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=content_token_limit,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+        # Every piece is ≤ max_tokens — the invariant the token-level
+        # fallback exists to enforce.
+        assert all(len(p.text) <= content_token_limit for p in out.payloads)
+
+    def test_underscored_column_gets_friendly_alias_in_parens(self) -> None:
+        # --- INPUT -----------------------------------------------------
+        # Column headers with underscores get a space-substituted friendly
+        # alias appended in parens on the `Columns:` line. Plain headers
+        # pass through untouched.
+        csv_text = "MTTR_hours,id,owner_name\n" "3,42,Alice\n"
+        link = "sheet:M"
+
+        # --- EXPECTED --------------------------------------------------
+        expected_texts = [
+            (
+                "sheet:M\n"
+                "Columns: MTTR_hours (MTTR hours), id, owner_name (owner name)\n"
+                "MTTR_hours=3, id=42, owner_name=Alice"
+            ),
+        ]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=500,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+
+    def test_oversized_row_between_small_rows_preserves_flanking_chunks(
+        self,
+    ) -> None:
+        # --- INPUT -----------------------------------------------------
+        # State-machine check: small row, oversized row, small row. The
+        # first small row should become a preluded chunk; the oversized
+        # row flushes it and emits split fragments without prelude; then
+        # the last small row picks up from wherever the split left off.
+        #
+        # Headers a,b,c,d. Row 1 and row 3 each have only column `a`
+        # populated (tiny). Row 2 is a "fat" row with all four columns
+        # populated.
+        csv_text = "a,b,c,d\n" "1,,,\n" "xxx,yyy,zzz,www\n" "2,,,\n"
+        link = "S"
+        content_token_limit = 20
+
+        # --- EXPECTED --------------------------------------------------
+        # Prelude = 'S\nColumns: a, b, c, d\n' = 1+1+19+1 = 22 > 20, so
+        #   sheet fits with the row but full Columns header does not.
+        # Row 1 formatted = "a=1" (3). build_chunk_from_scratch:
+        #   cols+row = 20+3 = 23 > 20 → skip cols. sheet+row = 1+1+3 = 5
+        #   ≤ 20 → chunk = "S\na=1".
+        # Row 2 formatted = "a=xxx, b=yyy, c=zzz, d=www" (26 > 20) →
+        #   flush "S\na=1" and split at pair boundaries:
+        #     "a=xxx, b=yyy, c=zzz" (19 ≤ 20 ✓)
+        #     "d=www"                (5)
+        # Row 3 formatted = "a=2" (3). can_pack onto "d=www" (5):
+        #   5 + 3 + 1 = 9 ≤ 20 ✓ → packs. Trailing fragment from the
+        #   split absorbs the next small row, which is the current v2
+        #   behavior (the fragment becomes `current_chunk` and the next
+        #   small row is appended with the standard packing rules).
+        expected_texts = [
+            "S\na=1",
+            "a=xxx, b=yyy, c=zzz",
+            "d=www\na=2",
+        ]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=content_token_limit,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+        assert all(len(p.text) <= content_token_limit for p in out.payloads)
+
+    def test_prelude_layering_column_header_fits_but_sheet_header_does_not(
+        self,
+    ) -> None:
+        # --- INPUT -----------------------------------------------------
+        # Budget lets `Columns: x\nx=y` fit but not the additional sheet
+        # header on top. The chunker should add the column header and
+        # drop the sheet header.
+        #
+        # sheet = "LongSheetName" (13), cols = "Columns: x" (10),
+        # row = "x=y" (3). Budget = 15.
+        #   cols + row:        10+1+3          = 14 ≤ 15 ✓
+        #   sheet + cols + row: 13+1+10+1+3    = 28 > 15 ✗
+        csv_text = "x\n" "y\n"
+        link = "LongSheetName"
+        content_token_limit = 15
+
+        # --- EXPECTED --------------------------------------------------
+        expected_texts = ["Columns: x\nx=y"]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=content_token_limit,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
+
+    def test_prelude_layering_sheet_header_fits_but_column_header_does_not(
+        self,
+    ) -> None:
+        # --- INPUT -----------------------------------------------------
+        # Budget is too small for the column header but leaves room for
+        # the short sheet header. The chunker should fall back to just
+        # sheet + row (its layered "try cols, then try sheet on top of
+        # whatever we have" logic means sheet is attempted on the bare
+        # row when cols didn't fit).
+        #
+        # sheet = "S" (1), cols = "Columns: ABC, DEF" (17),
+        # row = "ABC=1, DEF=2" (12). Budget = 20.
+        #   cols + row:        17+1+12        = 30 > 20 ✗
+        #   sheet + row:        1+1+12        = 14 ≤ 20 ✓
+        csv_text = "ABC,DEF\n" "1,2\n"
+        link = "S"
+        content_token_limit = 20
+
+        # --- EXPECTED --------------------------------------------------
+        expected_texts = ["S\nABC=1, DEF=2"]
+
+        # --- ACT -------------------------------------------------------
+        out = _make_chunker().chunk_section(
+            _tabular_section(csv_text, link=link),
+            AccumulatorState(),
+            content_token_limit=content_token_limit,
+        )
+
+        # --- ASSERT ----------------------------------------------------
+        assert [p.text for p in out.payloads] == expected_texts
--- a/backend/tests/unit/onyx/server/features/persona/test_knowledge_sources.py
+++ b/backend/tests/unit/onyx/server/features/persona/test_knowledge_sources.py
@@ -0,0 +1,188 @@
+"""Unit tests for MinimalPersonaSnapshot.from_model knowledge_sources aggregation."""
+
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from onyx.configs.constants import DocumentSource
+from onyx.configs.constants import FederatedConnectorSource
+from onyx.server.features.document_set.models import DocumentSetSummary
+from onyx.server.features.persona.models import MinimalPersonaSnapshot
+
+
+_STUB_DS_SUMMARY = DocumentSetSummary(
+    id=1,
+    name="stub",
+    description=None,
+    cc_pair_summaries=[],
+    is_up_to_date=True,
+    is_public=True,
+    users=[],
+    groups=[],
+)
+
+
+def _make_persona(**overrides: object) -> MagicMock:
+    """Build a mock Persona with sensible defaults.
+
+    Every relationship defaults to empty so tests only need to set the
+    fields they care about.
+    """
+    p = MagicMock()
+    p.id = 1
+    p.name = "test"
+    p.description = ""
+    p.tools = []
+    p.starter_messages = None
+    p.document_sets = []
+    p.hierarchy_nodes = []
+    p.attached_documents = []
+    p.user_files = []
+    p.llm_model_version_override = None
+    p.llm_model_provider_override = None
+    p.uploaded_image_id = None
+    p.icon_name = None
+    p.is_public = True
+    p.is_listed = True
+    p.display_priority = None
+    p.is_featured = False
+    p.builtin_persona = False
+    p.labels = []
+    p.user = None
+
+    for k, v in overrides.items():
+        setattr(p, k, v)
+    return p
+
+
+def _make_cc_pair(source: DocumentSource) -> MagicMock:
+    cc = MagicMock()
+    cc.connector.source = source
+    cc.name = source.value
+    cc.id = 1
+    cc.access_type = "PUBLIC"
+    return cc
+
+
+def _make_doc_set(
+    cc_pairs: list[MagicMock] | None = None,
+    fed_connectors: list[MagicMock] | None = None,
+) -> MagicMock:
+    ds = MagicMock()
+    ds.id = 1
+    ds.name = "ds"
+    ds.description = None
+    ds.is_up_to_date = True
+    ds.is_public = True
+    ds.users = []
+    ds.groups = []
+    ds.connector_credential_pairs = cc_pairs or []
+    ds.federated_connectors = fed_connectors or []
+    return ds
+
+
+def _make_federated_ds_mapping(
+    source: FederatedConnectorSource,
+) -> MagicMock:
+    mapping = MagicMock()
+    mapping.federated_connector.source = source
+    mapping.federated_connector_id = 1
+    mapping.entities = {}
+    return mapping
+
+
+def _make_hierarchy_node(source: DocumentSource) -> MagicMock:
+    node = MagicMock()
+    node.source = source
+    return node
+
+
+def _make_attached_document(source: DocumentSource) -> MagicMock:
+    doc = MagicMock()
+    doc.parent_hierarchy_node = MagicMock()
+    doc.parent_hierarchy_node.source = source
+    return doc
+
+
+@patch(
+    "onyx.server.features.persona.models.DocumentSetSummary.from_model",
+    return_value=_STUB_DS_SUMMARY,
+)
+def test_empty_persona_has_no_knowledge_sources(_mock_ds: MagicMock) -> None:
+    persona = _make_persona()
+    snapshot = MinimalPersonaSnapshot.from_model(persona)
+    assert snapshot.knowledge_sources == []
+
+
+@patch(
+    "onyx.server.features.persona.models.DocumentSetSummary.from_model",
+    return_value=_STUB_DS_SUMMARY,
+)
+def test_user_files_adds_user_file_source(_mock_ds: MagicMock) -> None:
+    persona = _make_persona(user_files=[MagicMock()])
+    snapshot = MinimalPersonaSnapshot.from_model(persona)
+    assert DocumentSource.USER_FILE in snapshot.knowledge_sources
+
+
+@patch(
+    "onyx.server.features.persona.models.DocumentSetSummary.from_model",
+    return_value=_STUB_DS_SUMMARY,
+)
+def test_no_user_files_excludes_user_file_source(_mock_ds: MagicMock) -> None:
+    cc = _make_cc_pair(DocumentSource.CONFLUENCE)
+    ds = _make_doc_set(cc_pairs=[cc])
+    persona = _make_persona(document_sets=[ds])
+    snapshot = MinimalPersonaSnapshot.from_model(persona)
+    assert DocumentSource.USER_FILE not in snapshot.knowledge_sources
+    assert DocumentSource.CONFLUENCE in snapshot.knowledge_sources
+
+
+@patch(
+    "onyx.server.features.persona.models.DocumentSetSummary.from_model",
+    return_value=_STUB_DS_SUMMARY,
+)
+def test_federated_connector_in_doc_set(_mock_ds: MagicMock) -> None:
+    fed = _make_federated_ds_mapping(FederatedConnectorSource.FEDERATED_SLACK)
+    ds = _make_doc_set(fed_connectors=[fed])
+    persona = _make_persona(document_sets=[ds])
+    snapshot = MinimalPersonaSnapshot.from_model(persona)
+    assert DocumentSource.SLACK in snapshot.knowledge_sources
+
+
+@patch(
+    "onyx.server.features.persona.models.DocumentSetSummary.from_model",
+    return_value=_STUB_DS_SUMMARY,
+)
+def test_hierarchy_nodes_and_attached_documents(_mock_ds: MagicMock) -> None:
+    node = _make_hierarchy_node(DocumentSource.GOOGLE_DRIVE)
+    doc = _make_attached_document(DocumentSource.SHAREPOINT)
+    persona = _make_persona(hierarchy_nodes=[node], attached_documents=[doc])
+    snapshot = MinimalPersonaSnapshot.from_model(persona)
+    assert DocumentSource.GOOGLE_DRIVE in snapshot.knowledge_sources
+    assert DocumentSource.SHAREPOINT in snapshot.knowledge_sources
+
+
+@patch(
+    "onyx.server.features.persona.models.DocumentSetSummary.from_model",
+    return_value=_STUB_DS_SUMMARY,
+)
+def test_all_source_types_combined(_mock_ds: MagicMock) -> None:
+    cc = _make_cc_pair(DocumentSource.CONFLUENCE)
+    fed = _make_federated_ds_mapping(FederatedConnectorSource.FEDERATED_SLACK)
+    ds = _make_doc_set(cc_pairs=[cc], fed_connectors=[fed])
+    node = _make_hierarchy_node(DocumentSource.GOOGLE_DRIVE)
+    doc = _make_attached_document(DocumentSource.SHAREPOINT)
+    persona = _make_persona(
+        document_sets=[ds],
+        hierarchy_nodes=[node],
+        attached_documents=[doc],
+        user_files=[MagicMock()],
+    )
+    snapshot = MinimalPersonaSnapshot.from_model(persona)
+    sources = set(snapshot.knowledge_sources)
+    assert sources == {
+        DocumentSource.CONFLUENCE,
+        DocumentSource.SLACK,
+        DocumentSource.GOOGLE_DRIVE,
+        DocumentSource.SHAREPOINT,
+        DocumentSource.USER_FILE,
+    }
--- a/backend/tests/unit/onyx/server/manage/llm/test_llm_provider_utils.py
+++ b/backend/tests/unit/onyx/server/manage/llm/test_llm_provider_utils.py
@@ -100,6 +100,39 @@ class TestGenerateOllamaDisplayName:
        result = generate_ollama_display_name("llama3.3:70b")
        assert "3.3" in result or "3 3" in result  # Either format is acceptable

+    def test_non_size_tag_shown(self) -> None:
+        """Test that non-size tags like 'e4b' are included in the display name."""
+        result = generate_ollama_display_name("gemma4:e4b")
+        assert "Gemma" in result
+        assert "4" in result
+        assert "E4B" in result
+
+    def test_size_with_cloud_modifier(self) -> None:
+        """Test size tag with cloud modifier."""
+        result = generate_ollama_display_name("deepseek-v3.1:671b-cloud")
+        assert "DeepSeek" in result
+        assert "671B" in result
+        assert "Cloud" in result
+
+    def test_size_with_multiple_modifiers(self) -> None:
+        """Test size tag with multiple modifiers."""
+        result = generate_ollama_display_name("qwen3-vl:235b-instruct-cloud")
+        assert "Qwen" in result
+        assert "235B" in result
+        assert "Instruct" in result
+        assert "Cloud" in result
+
+    def test_quantization_tag_shown(self) -> None:
+        """Test that quantization tags are included in the display name."""
+        result = generate_ollama_display_name("llama3:q4_0")
+        assert "Llama" in result
+        assert "Q4_0" in result
+
+    def test_cloud_only_tag(self) -> None:
+        """Test standalone cloud tag."""
+        result = generate_ollama_display_name("glm-4.6:cloud")
+        assert "CLOUD" in result
+

 class TestStripOpenrouterVendorPrefix:
    """Tests for OpenRouter vendor prefix stripping."""
--- a/backend/tests/unit/server/metrics/test_indexing_pipeline_collectors.py
+++ b/backend/tests/unit/server/metrics/test_indexing_pipeline_collectors.py
@@ -1,16 +1,11 @@
 """Tests for indexing pipeline Prometheus collectors."""

 from collections.abc import Iterator
-from datetime import datetime
-from datetime import timedelta
-from datetime import timezone
 from unittest.mock import MagicMock
 from unittest.mock import patch

 import pytest

-from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
-from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
 from onyx.server.metrics.indexing_pipeline import QueueDepthCollector


@@ -18,7 +13,7 @@ from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
 def _mock_broker_client() -> Iterator[None]:
    """Patch celery_get_broker_client for all collector tests."""
    with patch(
-        "onyx.background.celery.celery_redis.celery_get_broker_client",
+        "onyx.server.metrics.indexing_pipeline.celery_get_broker_client",
        return_value=MagicMock(),
    ):
        yield
@@ -137,212 +132,3 @@ class TestQueueDepthCollector:
            stale_result = collector.collect()

        assert stale_result is good_result
-
-
-class TestIndexAttemptCollector:
-    def test_returns_empty_when_not_configured(self) -> None:
-        collector = IndexAttemptCollector()
-        assert collector.collect() == []
-
-    def test_returns_empty_describe(self) -> None:
-        collector = IndexAttemptCollector()
-        assert collector.describe() == []
-
-    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
-    @patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
-    def test_collects_index_attempts(
-        self,
-        mock_get_session: MagicMock,
-        mock_get_tenants: MagicMock,
-    ) -> None:
-        collector = IndexAttemptCollector(cache_ttl=0)
-        collector.configure()
-
-        mock_get_tenants.return_value = ["public"]
-
-        mock_session = MagicMock()
-        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
-        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
-
-        from onyx.db.enums import IndexingStatus
-
-        mock_row = (
-            IndexingStatus.IN_PROGRESS,
-            MagicMock(value="web"),
-            81,
-            "Table Tennis Blade Guide",
-            2,
-        )
-        mock_session.query.return_value.join.return_value.join.return_value.filter.return_value.group_by.return_value.all.return_value = [
-            mock_row
-        ]
-
-        families = collector.collect()
-        assert len(families) == 1
-        assert families[0].name == "onyx_index_attempts_active"
-        assert len(families[0].samples) == 1
-        sample = families[0].samples[0]
-        assert sample.labels == {
-            "status": "in_progress",
-            "source": "web",
-            "tenant_id": "public",
-            "connector_name": "Table Tennis Blade Guide",
-            "cc_pair_id": "81",
-        }
-        assert sample.value == 2
-
-    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
-    def test_handles_db_error_gracefully(
-        self,
-        mock_get_tenants: MagicMock,
-    ) -> None:
-        collector = IndexAttemptCollector(cache_ttl=0)
-        collector.configure()
-
-        mock_get_tenants.side_effect = Exception("DB down")
-        families = collector.collect()
-        # No stale cache, so returns empty
-        assert families == []
-
-    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
-    def test_skips_none_tenant_ids(
-        self,
-        mock_get_tenants: MagicMock,
-    ) -> None:
-        collector = IndexAttemptCollector(cache_ttl=0)
-        collector.configure()
-
-        mock_get_tenants.return_value = [None]
-        families = collector.collect()
-        assert len(families) == 1  # Returns the gauge family, just with no samples
-        assert len(families[0].samples) == 0
-
-
-class TestConnectorHealthCollector:
-    def test_returns_empty_when_not_configured(self) -> None:
-        collector = ConnectorHealthCollector()
-        assert collector.collect() == []
-
-    def test_returns_empty_describe(self) -> None:
-        collector = ConnectorHealthCollector()
-        assert collector.describe() == []
-
-    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
-    @patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
-    def test_collects_connector_health(
-        self,
-        mock_get_session: MagicMock,
-        mock_get_tenants: MagicMock,
-    ) -> None:
-        collector = ConnectorHealthCollector(cache_ttl=0)
-        collector.configure()
-
-        mock_get_tenants.return_value = ["public"]
-
-        mock_session = MagicMock()
-        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
-        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
-
-        now = datetime.now(tz=timezone.utc)
-        last_success = now - timedelta(hours=2)
-
-        mock_status = MagicMock(value="ACTIVE")
-        mock_source = MagicMock(value="google_drive")
-        # Row: (id, status, in_error, last_success, name, source)
-        mock_row = (
-            42,
-            mock_status,
-            True,  # in_repeated_error_state
-            last_success,
-            "My GDrive Connector",
-            mock_source,
-        )
-        mock_session.query.return_value.join.return_value.all.return_value = [mock_row]
-
-        # Mock the index attempt queries (error counts + docs counts)
-        mock_session.query.return_value.filter.return_value.group_by.return_value.all.return_value = (
-            []
-        )
-
-        families = collector.collect()
-
-        assert len(families) == 6
-        names = {f.name for f in families}
-        assert names == {
-            "onyx_connector_last_success_age_seconds",
-            "onyx_connector_in_error_state",
-            "onyx_connectors_by_status",
-            "onyx_connectors_in_error_total",
-            "onyx_connector_docs_indexed",
-            "onyx_connector_error_count",
-        }
-
-        staleness = next(
-            f for f in families if f.name == "onyx_connector_last_success_age_seconds"
-        )
-        assert len(staleness.samples) == 1
-        assert staleness.samples[0].value == pytest.approx(7200, abs=5)
-
-        error_state = next(
-            f for f in families if f.name == "onyx_connector_in_error_state"
-        )
-        assert error_state.samples[0].value == 1.0
-
-        by_status = next(f for f in families if f.name == "onyx_connectors_by_status")
-        assert by_status.samples[0].labels == {
-            "tenant_id": "public",
-            "status": "ACTIVE",
-        }
-        assert by_status.samples[0].value == 1
-
-        error_total = next(
-            f for f in families if f.name == "onyx_connectors_in_error_total"
-        )
-        assert error_total.samples[0].value == 1
-
-    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
-    @patch("onyx.db.engine.sql_engine.get_session_with_current_tenant")
-    def test_skips_staleness_when_no_last_success(
-        self,
-        mock_get_session: MagicMock,
-        mock_get_tenants: MagicMock,
-    ) -> None:
-        collector = ConnectorHealthCollector(cache_ttl=0)
-        collector.configure()
-
-        mock_get_tenants.return_value = ["public"]
-
-        mock_session = MagicMock()
-        mock_get_session.return_value.__enter__ = MagicMock(return_value=mock_session)
-        mock_get_session.return_value.__exit__ = MagicMock(return_value=False)
-
-        mock_status = MagicMock(value="INITIAL_INDEXING")
-        mock_source = MagicMock(value="slack")
-        mock_row = (
-            10,
-            mock_status,
-            False,
-            None,  # no last_successful_index_time
-            0,
-            mock_source,
-        )
-        mock_session.query.return_value.join.return_value.all.return_value = [mock_row]
-
-        families = collector.collect()
-
-        staleness = next(
-            f for f in families if f.name == "onyx_connector_last_success_age_seconds"
-        )
-        assert len(staleness.samples) == 0
-
-    @patch("onyx.db.engine.tenant_utils.get_all_tenant_ids")
-    def test_handles_db_error_gracefully(
-        self,
-        mock_get_tenants: MagicMock,
-    ) -> None:
-        collector = ConnectorHealthCollector(cache_ttl=0)
-        collector.configure()
-
-        mock_get_tenants.side_effect = Exception("DB down")
-        families = collector.collect()
-        assert families == []
--- a/deployment/helm/charts/onyx/dashboards/indexing-pruning.json
+++ b/deployment/helm/charts/onyx/dashboards/indexing-pruning.json
@@ -0,0 +1,927 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "  This chart shows how long it takes for Onyx to crawl each source connector and collect the current list of documents. The Y axis represents duration  in seconds (bucketed), and each band shows how many enumerations completed within that time range.",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "scaleDistribution": {
+              "type": "linear"
+            }
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "calculate": false,
+        "cellGap": 1,
+        "color": {
+          "exponent": 0.5,
+          "fill": "dark-orange",
+          "mode": "scheme",
+          "reverse": false,
+          "scale": "exponential",
+          "scheme": "Oranges",
+          "steps": 64
+        },
+        "exemplars": {
+          "color": "rgba(255,0,255,0.7)"
+        },
+        "filterValues": {
+          "le": 1e-09
+        },
+        "legend": {
+          "show": true
+        },
+        "rowsFrame": {
+          "layout": "auto"
+        },
+        "tooltip": {
+          "mode": "single",
+          "showColorScale": false,
+          "yHistogram": false
+        },
+        "yAxis": {
+          "axisPlacement": "left",
+          "reverse": false,
+          "unit": "s"
+        }
+      },
+      "pluginVersion": "10.4.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(increase(onyx_pruning_enumeration_duration_seconds_bucket[30m])) by (le)",
+          "format": "heatmap",
+          "instant": false,
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Pruning Enumeration Duration",
+      "type": "heatmap"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 7,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.95, sum(rate(onyx_pruning_enumeration_duration_seconds_bucket[1h])) by (le, connector_type))",
+          "instant": false,
+          "legendFormat": "{{connector_type}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Pruning Enumeration Duration p95 by Connector",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "Shows how many pruning enumerations completed per hour, broken down by connector type. A low count means few connectors are successfully completing the enumeration phase. A count of 0 for a connector type that should be pruning indicates enumerations are timing out before completion.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 8
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(increase(onyx_pruning_enumeration_duration_seconds_count[1h])) by (connector_type)",
+          "instant": false,
+          "legendFormat": "{{connector_type}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Pruning Enumeration Count",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "Shows the 95th percentile execution duration of pruning tasks. A rising p95 indicates pruning jobs are taking longer over time, potentially approaching the 6-hour timeout limit. Sustained values near 21600s (6 hours) indicate connectors with too many documents to prune within the allowed window.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 8
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.95, sum(rate(onyx_celery_task_duration_seconds_bucket{task_name=~\"connector_pruning.*\"}[1h])) by (le, task_name))",
+          "instant": false,
+          "legendFormat": "{{task_name}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Pruning Task Duration p95",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "Shows the number of currently executing pruning tasks on the heavy worker, broken down by task type. A value of 0 means no pruning is actively running. A sustained high count may indicate workers are saturated and new pruning jobs are queuing up.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "__systemRef": "hideSeriesFrom",
+            "matcher": {
+              "id": "byNames",
+              "options": {
+                "mode": "exclude",
+                "names": [
+                  "connector_pruning_generator_task"
+                ],
+                "prefix": "All except:",
+                "readOnly": true
+              }
+            },
+            "properties": [
+              {
+                "id": "custom.hideFrom",
+                "value": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": true
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(onyx_celery_tasks_active{queue=~\"connector_pruning.*|connector_doc_permissions.*|connector_external_group.*|csv_generation|sandbox\"}) by (task_name)",
+          "instant": false,
+          "legendFormat": "{{task_name}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Heavy Worker - Active Tasks",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "This chart shows how frequently Onyx hits rate limits from source connectors during the enumeration phase. Rate limit errors slow down or stall the document crawl, directly increasing enumeration duration. A spike here for a specific connector type indicates the source API is throttling Onyx's requests, which may explain long enumeration times for that connector.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "id": 3,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(onyx_pruning_rate_limit_errors_total[5m])) by (connector_type)",
+          "instant": false,
+          "legendFormat": "{{connector_type}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Pruning Rate Limit Errors",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "Shows the rate of pruning task failures and revocations per hour. Failures indicate crashed tasks (DB errors, timeouts). Revocations indicate cancelled tasks, typically from worker restarts or deployments. Both result in orphaned fences that block future pruning attempts for affected connectors.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 24
+      },
+      "id": 9,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(increase(onyx_celery_task_revoked_total{task_name=~\"connector_pruning.*\"}[1h])) by (task_name)",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "revoked",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(increase(onyx_celery_task_completed_total{task_name=~\"connector_pruning.*\", outcome=\"failure\"}[1h])) by (task_name)",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "failure",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(increase(onyx_celery_task_completed_total{task_name=~\"connector_pruning.*\", outcome=\"success\"}[1h])) by (task_name)",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "success",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "Heavy Worker - Pruning Task Success & Failures & Revocations",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "Shows the ratio of successfully completed pruning tasks to total completed tasks. A value of 1.0 (100%) means all pruning jobs are completing cleanly. A drop indicates tasks are crashing or timing out, which leads to orphaned fences and connectors being blocked from future pruning attempts.",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 24
+      },
+      "id": 8,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "  sum(rate(onyx_celery_task_completed_total{task_name=~\"connector_pruning.*\", outcome=\"success\"}[1h]))\n  /\n  sum(rate(onyx_celery_task_completed_total{task_name=~\"connector_pruning.*\"}[1h]))",
+          "instant": false,
+          "legendFormat": "Success Rate",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Heavy Worker - Pruning Task Success Rate",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "This chart shows how long it takes Onyx to compare the list of documents fetched from the source connector against what is currently indexed. The diff computes the set difference \u2014 documents that exist in the index but no longer exist in the source are flagged for removal.",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "scaleDistribution": {
+              "type": "linear"
+            }
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 32
+      },
+      "id": 2,
+      "options": {
+        "calculate": false,
+        "cellGap": 1,
+        "color": {
+          "exponent": 0.5,
+          "fill": "dark-orange",
+          "mode": "scheme",
+          "reverse": false,
+          "scale": "exponential",
+          "scheme": "Oranges",
+          "steps": 64
+        },
+        "exemplars": {
+          "color": "rgba(255,0,255,0.7)"
+        },
+        "filterValues": {
+          "le": 1e-09
+        },
+        "legend": {
+          "show": true
+        },
+        "rowsFrame": {
+          "layout": "auto"
+        },
+        "tooltip": {
+          "mode": "single",
+          "showColorScale": false,
+          "yHistogram": false
+        },
+        "yAxis": {
+          "axisPlacement": "left",
+          "reverse": false,
+          "unit": "s"
+        }
+      },
+      "pluginVersion": "10.4.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(increase(onyx_pruning_diff_duration_seconds_bucket[30m])) by (le)",
+          "format": "heatmap",
+          "instant": false,
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Pruning Diff Duration",
+      "type": "heatmap"
+    }
+  ],
+  "schemaVersion": 39,
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-6h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Indexing - Pruning",
+  "uid": "onyx-indexing-pruning",
+  "version": 10,
+  "weekStart": ""
+}
--- a/deployment/helm/charts/onyx/templates/grafana-dashboards.yaml
+++ b/deployment/helm/charts/onyx/templates/grafana-dashboards.yaml
@@ -38,4 +38,17 @@ metadata:
 data:
  onyx-redis-queues.json: |
    {{- .Files.Get "dashboards/redis-queues.json" | nindent 4 }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "onyx.fullname" . }}-indexing-pruning-dashboard
+  labels:
+    {{- include "onyx.labels" . | nindent 4 }}
+    grafana_dashboard: "1"
+  annotations:
+    grafana_folder: "Onyx"
+data:
+  onyx-indexing-pruning.json: |
+    {{- .Files.Get "dashboards/indexing-pruning.json" | nindent 4 }}
 {{- end }}
--- a/docs/METRICS.md
+++ b/docs/METRICS.md
@@ -217,11 +217,23 @@ Enriches docfetching and docprocessing tasks with connector-level labels. Silent
 | `onyx_indexing_task_completed_total`  | Counter   | `task_name`, `source`, `tenant_id`, `cc_pair_id`, `outcome` | Indexing tasks completed per connector   |
 | `onyx_indexing_task_duration_seconds` | Histogram | `task_name`, `source`, `tenant_id`                          | Indexing task duration by connector type |

-`connector_name` is intentionally excluded from these push-based counters to avoid unbounded cardinality (it's a free-form user string). The pull-based collectors on the monitoring worker include it since they have bounded cardinality (one series per connector).
+`connector_name` is intentionally excluded from these push-based counters to avoid unbounded cardinality (it's a free-form user string).
+
+### Connector Health Metrics (`onyx.server.metrics.connector_health_metrics`)
+
+Push-based metrics emitted by docfetching and docprocessing workers at the point where connector state changes occur. Scales to any number of tenants (no schema iteration).
+
+| Metric                                          | Type    | Labels                                        | Description                                                   |
+| ----------------------------------------------- | ------- | --------------------------------------------- | ------------------------------------------------------------- |
+| `onyx_index_attempt_transitions_total`          | Counter | `tenant_id`, `source`, `cc_pair_id`, `status` | Index attempt status transitions (in_progress, success, etc.) |
+| `onyx_connector_in_error_state`                 | Gauge   | `tenant_id`, `source`, `cc_pair_id`           | Whether connector is in repeated error state (1=yes, 0=no)    |
+| `onyx_connector_last_success_timestamp_seconds` | Gauge   | `tenant_id`, `source`, `cc_pair_id`           | Unix timestamp of last successful indexing                    |
+| `onyx_connector_docs_indexed_total`             | Counter | `tenant_id`, `source`, `cc_pair_id`           | Total documents indexed per connector (monotonic)             |
+| `onyx_connector_indexing_errors_total`          | Counter | `tenant_id`, `source`, `cc_pair_id`           | Total failed index attempts per connector (monotonic)         |

 ### Pull-Based Collectors (`onyx.server.metrics.indexing_pipeline`)

-Registered only in the **Monitoring** worker. Collectors query Redis/Postgres at scrape time with a 30-second TTL cache.
+Registered only in the **Monitoring** worker. Collectors query Redis at scrape time with a 30-second TTL cache and a 120-second timeout to prevent the `/metrics` endpoint from hanging.

 | Metric                               | Type  | Labels  | Description                         |
 | ------------------------------------ | ----- | ------- | ----------------------------------- |
@@ -229,8 +241,6 @@ Registered only in the **Monitoring** worker. Collectors query Redis/Postgres at
 | `onyx_queue_unacked`                 | Gauge | `queue` | Unacknowledged messages per queue   |
 | `onyx_queue_oldest_task_age_seconds` | Gauge | `queue` | Age of the oldest task in the queue |

-Plus additional connector health, index attempt, and worker heartbeat metrics — see `indexing_pipeline.py` for the full list.
-
 ### Adding Metrics to a Worker

 Currently only the docfetching and docprocessing workers have push-based task metrics wired up. To add metrics to another worker (e.g. heavy, light, primary):
--- a/profiling/README.md
+++ b/profiling/README.md
@@ -0,0 +1,43 @@
+# Onyx Local Monitoring Stack
+
+Prometheus + Grafana for local development. Pre-loaded with dashboards for the Onyx backend.
+
+## Usage
+
+```bash
+cd profiling/
+docker compose up -d
+```
+
+| Service    | URL                          | Credentials   |
+|------------|------------------------------|---------------|
+| Grafana    | http://localhost:3001        | admin / admin |
+| Prometheus | http://localhost:9090        | —             |
+
+## Dashboards
+
+- **Onyx DB Pool Health** — PostgreSQL connection pool utilization
+- **Onyx Indexing Pipeline v2** — Per-connector indexing throughput, queue depth, task latency
+
+## Scrape targets
+
+| Job                      | Port  | Source                        |
+|--------------------------|-------|-------------------------------|
+| `onyx-api-server`        | 8080  | FastAPI `/metrics` (matches `.vscode/launch.json`) |
+| `onyx-monitoring-worker` | 9096  | Celery monitoring worker      |
+| `onyx-docfetching-worker`| 9092  | Celery docfetching worker     |
+| `onyx-docprocessing-worker`| 9093 | Celery docprocessing worker  |
+
+## Environment variables
+
+Override defaults with a `.env` file in this directory or by setting them in your shell:
+
+| Variable            | Default | Description                     |
+|---------------------|---------|---------------------------------|
+| `PROMETHEUS_PORT`   | `9090`  | Host port for Prometheus UI     |
+| `GRAFANA_PORT`      | `3001`  | Host port for Grafana UI        |
+| `GF_ADMIN_PASSWORD` | `admin` | Grafana admin password          |
+
+## Editing dashboards
+
+`allowUiUpdates: true` is set in the provisioning config, so you can edit dashboards in the Grafana UI. However, **changes don't persist** across `docker compose down` — to keep edits, export the dashboard JSON and overwrite the file in `grafana/dashboards/onyx/`.
--- a/profiling/docker-compose.yml
+++ b/profiling/docker-compose.yml
@@ -0,0 +1,38 @@
+services:
+  # Prometheus - time series database for metrics
+  prometheus:
+    image: prom/prometheus:v3.2.1
+    ports:
+      - "${PROMETHEUS_PORT:-9090}:9090"
+    volumes:
+      - ./prometheus.yml:/etc/prometheus/prometheus.yml
+      - prometheus_data:/prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.console.libraries=/etc/prometheus/console_libraries'
+      - '--web.console.templates=/etc/prometheus/consoles'
+      - '--storage.tsdb.retention.time=200h'
+      - '--web.enable-lifecycle'
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+
+  # Grafana - dashboards and visualization
+  grafana:
+    image: grafana/grafana:11.6.0
+    ports:
+      - "${GRAFANA_PORT:-3001}:3000"
+    environment:
+      - GF_SECURITY_ADMIN_PASSWORD=${GF_ADMIN_PASSWORD:-admin}
+    volumes:
+      - grafana_data:/var/lib/grafana
+      - ./grafana/provisioning:/etc/grafana/provisioning
+      - ./grafana/dashboards:/var/lib/grafana/dashboards
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    depends_on:
+      - prometheus
+
+volumes:
+  prometheus_data:
+  grafana_data:
--- a/profiling/grafana/dashboards/onyx/db-pool-health.json
+++ b/profiling/grafana/dashboards/onyx/db-pool-health.json
@@ -0,0 +1,561 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": { "type": "grafana", "uid": "-- Grafana --" },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 1,
+  "id": null,
+  "links": [],
+  "liveNow": true,
+  "panels": [
+    {
+      "title": "Pool Connections Checked Out (sync)",
+      "description": "Number of connections currently held by application code. Should spike briefly then return to ~0 with the fix. Without the fix, this climbs to match concurrent streams and stays there.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 },
+      "id": 1,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisLabel": "connections",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 30,
+            "gradientMode": "scheme",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "dashed" }
+          },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 20 },
+              { "color": "red", "value": 40 }
+            ]
+          },
+          "min": 0,
+          "max": 50
+        },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "pool_size" },
+            "properties": [
+              { "id": "custom.drawStyle", "value": "line" },
+              { "id": "custom.lineStyle", "value": { "fill": "dash", "dash": [10, 10] } },
+              { "id": "custom.fillOpacity", "value": 0 },
+              { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } }
+            ]
+          }
+        ]
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "onyx_db_pool_checked_out{engine=\"sync\"}",
+          "legendFormat": "checked_out",
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "onyx_db_pool_size{engine=\"sync\"}",
+          "legendFormat": "pool_size",
+          "refId": "B"
+        }
+      ]
+    },
+    {
+      "title": "Pool Connections Checked Out (all engines)",
+      "description": "Checked out connections across sync, async, and readonly engines.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 },
+      "id": 2,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisLabel": "connections",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "showPoints": "never",
+            "stacking": { "group": "A", "mode": "normal" }
+          },
+          "min": 0
+        },
+        "overrides": []
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "onyx_db_pool_checked_out",
+          "legendFormat": "{{engine}}",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "title": "Connections Held by Endpoint",
+      "description": "Which API handlers are currently holding DB connections. The chat streaming endpoint should drop to 0 during Phase 2 with the fix.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
+      "id": 3,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisLabel": "connections",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "showPoints": "never",
+            "stacking": { "group": "A", "mode": "normal" }
+          },
+          "min": 0
+        },
+        "overrides": []
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "onyx_db_connections_held_by_endpoint{engine=\"sync\"} > 0",
+          "legendFormat": "{{handler}}",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "title": "Connection Hold Duration (p50 / p95 / p99)",
+      "description": "How long connections are held before being returned. With the fix, p95 should drop from stream-duration (30s+) to sub-second.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
+      "id": 4,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisLabel": "seconds",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "showPoints": "never"
+          },
+          "unit": "s",
+          "min": 0
+        },
+        "overrides": []
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.50, sum by (le)(rate(onyx_db_connection_hold_seconds_bucket{engine=\"sync\"}[1m])))",
+          "legendFormat": "sync p50",
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.95, sum by (le)(rate(onyx_db_connection_hold_seconds_bucket{engine=\"sync\"}[1m])))",
+          "legendFormat": "sync p95",
+          "refId": "B"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.99, sum by (le)(rate(onyx_db_connection_hold_seconds_bucket{engine=\"sync\"}[1m])))",
+          "legendFormat": "sync p99",
+          "refId": "C"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.99, sum by (le)(rate(onyx_db_connection_hold_seconds_bucket{engine=\"async\"}[1m])))",
+          "legendFormat": "async p99",
+          "refId": "D"
+        }
+      ]
+    },
+    {
+      "title": "Async vs Sync Hold Duration (p99)",
+      "description": "Compares connection hold times between sync (our fix) and async (auth middleware). Sync should be sub-second after fix. Async stays high because FastAPI auth dependency holds a session for the entire StreamingResponse lifetime.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
+      "id": 11,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisLabel": "seconds",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "showPoints": "never"
+          },
+          "unit": "s",
+          "min": 0
+        },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "sync p99" },
+            "properties": [
+              { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }
+            ]
+          },
+          {
+            "matcher": { "id": "byName", "options": "async p99" },
+            "properties": [
+              { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }
+            ]
+          }
+        ]
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.99, sum by (le)(rate(onyx_db_connection_hold_seconds_bucket{engine=\"sync\"}[1m])))",
+          "legendFormat": "sync p99",
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.99, sum by (le)(rate(onyx_db_connection_hold_seconds_bucket{engine=\"async\"}[1m])))",
+          "legendFormat": "async p99",
+          "refId": "B"
+        }
+      ]
+    },
+    {
+      "title": "Async Connections Held (auth middleware)",
+      "description": "Async engine connections checked out — these are held by FastAPI's auth dependency for the entire StreamingResponse lifetime. This is the NEXT bottleneck to fix after the sync session changes.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
+      "id": 12,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisLabel": "connections",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 30,
+            "gradientMode": "scheme",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "showPoints": "never",
+            "thresholdsStyle": { "mode": "dashed" }
+          },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 15 },
+              { "color": "red", "value": 30 }
+            ]
+          },
+          "min": 0
+        },
+        "overrides": []
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "onyx_db_pool_checked_out{engine=\"async\"}",
+          "legendFormat": "async checked_out",
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "onyx_db_pool_size{engine=\"async\"}",
+          "legendFormat": "async pool_size",
+          "refId": "B"
+        }
+      ]
+    },
+    {
+      "title": "Pool Checkout Rate (per second)",
+      "description": "Rate of connection checkouts. With the fix, each chat creates multiple short checkouts (Phase 1 + Phase 3) instead of one long one.",
+      "type": "timeseries",
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 30 },
+      "id": 5,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisLabel": "checkouts/s",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "showPoints": "never"
+          },
+          "unit": "ops",
+          "min": 0
+        },
+        "overrides": []
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "rate(onyx_db_pool_checkout_total{engine=\"sync\"}[30s])",
+          "legendFormat": "sync checkouts/s",
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "rate(onyx_db_pool_checkin_total{engine=\"sync\"}[30s])",
+          "legendFormat": "sync checkins/s",
+          "refId": "B"
+        }
+      ]
+    },
+    {
+      "title": "Pool Overflow & Timeouts",
+      "description": "Overflow = connections beyond pool_size. Timeouts = requests that couldn't get a connection. Any timeout is a user-facing error.",
+      "type": "timeseries",
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 30 },
+      "id": 6,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "showPoints": "never"
+          },
+          "min": 0
+        },
+        "overrides": [
+          {
+            "matcher": { "id": "byName", "options": "timeouts" },
+            "properties": [
+              { "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } },
+              { "id": "custom.fillOpacity", "value": 50 }
+            ]
+          }
+        ]
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "onyx_db_pool_overflow{engine=\"sync\"}",
+          "legendFormat": "overflow (sync)",
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "increase(onyx_db_pool_checkout_timeout_total{engine=\"sync\"}[30s])",
+          "legendFormat": "timeouts",
+          "refId": "B"
+        }
+      ]
+    },
+    {
+      "title": "Current Pool State",
+      "description": "Snapshot of pool health right now.",
+      "type": "stat",
+      "gridPos": { "h": 6, "w": 6, "x": 0, "y": 38 },
+      "id": 7,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 15 },
+              { "color": "red", "value": 35 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "area",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "onyx_db_pool_checked_out{engine=\"sync\"}",
+          "legendFormat": "Checked Out (sync)",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "title": "Total Checkout Timeouts",
+      "description": "Cumulative pool checkout timeouts — each one is a failed request.",
+      "type": "stat",
+      "gridPos": { "h": 6, "w": 6, "x": 6, "y": 38 },
+      "id": 8,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "red", "value": 1 }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "none",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "sum(onyx_db_pool_checkout_timeout_total)",
+          "legendFormat": "Timeouts",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "title": "Pool Utilization %",
+      "description": "Percentage of pool_size currently checked out.",
+      "type": "gauge",
+      "gridPos": { "h": 6, "w": 6, "x": 12, "y": 38 },
+      "id": 9,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 50 },
+              { "color": "red", "value": 80 }
+            ]
+          },
+          "min": 0,
+          "max": 100,
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "options": {
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "onyx_db_pool_checked_out{engine=\"sync\"} / onyx_db_pool_size{engine=\"sync\"} * 100",
+          "legendFormat": "Utilization",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "title": "Total Checkouts",
+      "description": "Cumulative connection checkouts since server start.",
+      "type": "stat",
+      "gridPos": { "h": 6, "w": 6, "x": 18, "y": 38 },
+      "id": 10,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [{ "color": "blue", "value": null }]
+          }
+        },
+        "overrides": []
+      },
+      "options": {
+        "colorMode": "background",
+        "graphMode": "area",
+        "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
+        "textMode": "auto"
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "sum(onyx_db_pool_checkout_total{engine=\"sync\"})",
+          "legendFormat": "Total Checkouts",
+          "refId": "A"
+        }
+      ]
+    }
+  ],
+  "refresh": "5s",
+  "schemaVersion": 37,
+  "style": "dark",
+  "tags": ["onyx", "db-pool", "load-test"],
+  "templating": {
+    "list": [
+    {
+      "current": { "text": "Prometheus", "value": "prometheus" },
+      "includeAll": false,
+      "name": "DS_PROMETHEUS",
+      "options": [],
+      "query": "prometheus",
+      "refresh": 1,
+      "type": "datasource"
+    }
+    ]
+  },
+  "time": { "from": "now-15m", "to": "now" },
+  "timepicker": {
+    "refresh_intervals": ["5s", "10s", "30s", "1m"]
+  },
+  "timezone": "",
+  "title": "Onyx DB Pool Health",
+  "uid": "onyx-db-pool-health",
+  "version": 0,
+  "weekStart": ""
+}
--- a/profiling/grafana/dashboards/onyx/indexing-pipeline.json
+++ b/profiling/grafana/dashboards/onyx/indexing-pipeline.json
--- a/profiling/grafana/provisioning/dashboards/dashboards.yaml
+++ b/profiling/grafana/provisioning/dashboards/dashboards.yaml
@@ -0,0 +1,12 @@
+apiVersion: 1
+
+providers:
+  - name: 'onyx-dashboards'
+    orgId: 1
+    folder: 'Onyx'
+    type: file
+    updateIntervalSeconds: 10
+    allowUiUpdates: true
+    options:
+      path: /var/lib/grafana/dashboards/onyx
+      foldersFromFilesStructure: false
--- a/profiling/grafana/provisioning/datasources/datasource.yaml
+++ b/profiling/grafana/provisioning/datasources/datasource.yaml
@@ -0,0 +1,10 @@
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    isDefault: true
+    uid: PBFA97CFB590B2093
+    editable: true
--- a/profiling/prometheus.yml
+++ b/profiling/prometheus.yml
@@ -0,0 +1,36 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+scrape_configs:
+  # Prometheus self-monitoring
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']
+
+  # Onyx API server — exposes /metrics via prometheus-fastapi-instrumentator
+  # Port matches the API Server launch config in .vscode/launch.json
+  - job_name: 'onyx-api-server'
+    scrape_interval: 5s
+    metrics_path: /metrics
+    static_configs:
+      - targets: ['host.docker.internal:8080']
+
+  # Onyx celery workers — each exposes /metrics on a dedicated port
+  - job_name: 'onyx-monitoring-worker'
+    scrape_interval: 5s
+    metrics_path: /metrics
+    static_configs:
+      - targets: ['host.docker.internal:9096']
+
+  - job_name: 'onyx-docfetching-worker'
+    scrape_interval: 5s
+    metrics_path: /metrics
+    static_configs:
+      - targets: ['host.docker.internal:9092']
+
+  - job_name: 'onyx-docprocessing-worker'
+    scrape_interval: 5s
+    metrics_path: /metrics
+    static_configs:
+      - targets: ['host.docker.internal:9093']
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,7 +60,7 @@ backend = [
    "httpcore==1.0.9",
    "httpx[http2]==0.28.1",
    "httpx-oauth==0.15.1",
-    "huggingface-hub==0.35.3",
+    "huggingface-hub==1.10.2",
    "inflection==0.5.1",
    "jira==3.10.5",
    "jsonref==1.1.0",
@@ -84,7 +84,7 @@ backend = [
    "openpyxl==3.0.10",
    "opensearch-py==3.0.0",
    "passlib==1.7.4",
-    "playwright==1.55.0",
+    "playwright==1.58.0",
    "psutil==7.1.3",
    "psycopg2-binary==2.9.9",
    "puremagic==1.28",
@@ -189,9 +189,9 @@ model_server = [
    "einops==0.8.1",
    "numpy==2.4.1",
    "safetensors==0.5.3",
-    "sentence-transformers==4.0.2",
+    "sentence-transformers==5.4.1",
    "torch==2.9.1",
-    "transformers==4.53.0",
+    "transformers==5.5.4",
    "sentry-sdk[fastapi,celery,starlette]==2.14.0",
 ]

@@ -227,11 +227,6 @@ module = "generated.*"
 follow_imports = "silent"
 ignore_errors = true

-[[tool.mypy.overrides]]
-module = "transformers.*"
-follow_imports = "skip"
-ignore_errors = true
-
 [tool.uv.workspace]
 members = ["tools/ods"]

--- a/tools/ods/cmd/dev_up.go
+++ b/tools/ods/cmd/dev_up.go
@@ -154,11 +154,11 @@ func worktreeGitMount(root string) (string, bool) {
 func sshAgentMount() (string, bool) {
 	sock := os.Getenv("SSH_AUTH_SOCK")
 	if sock == "" {
-		log.Debug("SSH_AUTH_SOCK not set — skipping SSH agent forwarding")
+		log.Warn("SSH_AUTH_SOCK not set — SSH agent forwarding disabled (git over SSH won't work inside the container)")
 		return "", false
 	}
 	if _, err := os.Stat(sock); err != nil {
-		log.Debugf("SSH_AUTH_SOCK=%s not accessible: %v", sock, err)
+		log.Warnf("SSH_AUTH_SOCK=%s not accessible — SSH agent forwarding disabled: %v", sock, err)
 		return "", false
 	}
 	mount := fmt.Sprintf("type=bind,source=%s,target=/tmp/ssh-agent.sock", sock)
--- a/uv.lock
+++ b/uv.lock
@@ -2301,31 +2301,34 @@ wheels = [

 [[package]]
 name = "hf-xet"
-version = "1.2.0"
+version = "1.4.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020, upload-time = "2025-10-24T19:04:32.129Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/53/92/ec9ad04d0b5728dca387a45af7bc98fbb0d73b2118759f5f6038b61a57e8/hf_xet-1.4.3.tar.gz", hash = "sha256:8ddedb73c8c08928c793df2f3401ec26f95be7f7e516a7bee2fbb546f6676113", size = 670477, upload-time = "2026-03-31T22:40:07.874Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9e/a5/85ef910a0aa034a2abcfadc360ab5ac6f6bc4e9112349bd40ca97551cff0/hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649", size = 2861870, upload-time = "2025-10-24T19:04:11.422Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/40/e2e0a7eb9a51fe8828ba2d47fe22a7e74914ea8a0db68a18c3aa7449c767/hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813", size = 2717584, upload-time = "2025-10-24T19:04:09.586Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/7d/daf7f8bc4594fdd59a8a596f9e3886133fdc68e675292218a5e4c1b7e834/hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc", size = 3315004, upload-time = "2025-10-24T19:04:00.314Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/ba/45ea2f605fbf6d81c8b21e4d970b168b18a53515923010c312c06cd83164/hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5", size = 3222636, upload-time = "2025-10-24T19:03:58.111Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/1d/04513e3cab8f29ab8c109d309ddd21a2705afab9d52f2ba1151e0c14f086/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f", size = 3408448, upload-time = "2025-10-24T19:04:20.951Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/7c/60a2756d7feec7387db3a1176c632357632fbe7849fce576c5559d4520c7/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832", size = 3503401, upload-time = "2025-10-24T19:04:22.549Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/64/48fffbd67fb418ab07451e4ce641a70de1c40c10a13e25325e24858ebe5a/hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382", size = 2900866, upload-time = "2025-10-24T19:04:33.461Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/51/f7e2caae42f80af886db414d4e9885fac959330509089f97cccb339c6b87/hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e", size = 2861861, upload-time = "2025-10-24T19:04:19.01Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/1d/a641a88b69994f9371bd347f1dd35e5d1e2e2460a2e350c8d5165fc62005/hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8", size = 2717699, upload-time = "2025-10-24T19:04:17.306Z" },
-    { url = "https://files.pythonhosted.org/packages/df/e0/e5e9bba7d15f0318955f7ec3f4af13f92e773fbb368c0b8008a5acbcb12f/hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0", size = 3314885, upload-time = "2025-10-24T19:04:07.642Z" },
-    { url = "https://files.pythonhosted.org/packages/21/90/b7fe5ff6f2b7b8cbdf1bd56145f863c90a5807d9758a549bf3d916aa4dec/hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090", size = 3221550, upload-time = "2025-10-24T19:04:05.55Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/cb/73f276f0a7ce46cc6a6ec7d6c7d61cbfe5f2e107123d9bbd0193c355f106/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a", size = 3408010, upload-time = "2025-10-24T19:04:28.598Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/1e/d642a12caa78171f4be64f7cd9c40e3ca5279d055d0873188a58c0f5fbb9/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f", size = 3503264, upload-time = "2025-10-24T19:04:30.397Z" },
-    { url = "https://files.pythonhosted.org/packages/17/b5/33764714923fa1ff922770f7ed18c2daae034d21ae6e10dbf4347c854154/hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc", size = 2901071, upload-time = "2025-10-24T19:04:37.463Z" },
-    { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099, upload-time = "2025-10-24T19:04:15.366Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178, upload-time = "2025-10-24T19:04:13.695Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214, upload-time = "2025-10-24T19:04:03.596Z" },
-    { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054, upload-time = "2025-10-24T19:04:01.949Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812, upload-time = "2025-10-24T19:04:24.585Z" },
-    { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920, upload-time = "2025-10-24T19:04:26.927Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" },
+    { url = "https://files.pythonhosted.org/packages/72/43/724d307b34e353da0abd476e02f72f735cdd2bc86082dee1b32ea0bfee1d/hf_xet-1.4.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:7551659ba4f1e1074e9623996f28c3873682530aee0a846b7f2f066239228144", size = 3800935, upload-time = "2026-03-31T22:39:49.618Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/d2/8bee5996b699262edb87dbb54118d287c0e1b2fc78af7cdc41857ba5e3c4/hf_xet-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bee693ada985e7045997f05f081d0e12c4c08bd7626dc397f8a7c487e6c04f7f", size = 3558942, upload-time = "2026-03-31T22:39:47.938Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/a1/e993d09cbe251196fb60812b09a58901c468127b7259d2bf0f68bf6088eb/hf_xet-1.4.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21644b404bb0100fe3857892f752c4d09642586fd988e61501c95bbf44b393a3", size = 4207657, upload-time = "2026-03-31T22:39:39.69Z" },
+    { url = "https://files.pythonhosted.org/packages/64/44/9eb6d21e5c34c63e5e399803a6932fa983cabdf47c0ecbcfe7ea97684b8c/hf_xet-1.4.3-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:987f09cfe418237812896a6736b81b1af02a3a6dcb4b4944425c4c4fca7a7cf8", size = 3986765, upload-time = "2026-03-31T22:39:37.936Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/7b/8ad6f16fdb82f5f7284a34b5ec48645bd575bdcd2f6f0d1644775909c486/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:60cf7fc43a99da0a853345cf86d23738c03983ee5249613a6305d3e57a5dca74", size = 4188162, upload-time = "2026-03-31T22:39:58.382Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/c4/39d6e136cbeea9ca5a23aad4b33024319222adbdc059ebcda5fc7d9d5ff4/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2815a49a7a59f3e2edf0cf113ae88e8cb2ca2a221bf353fb60c609584f4884d4", size = 4424525, upload-time = "2026-03-31T22:40:00.225Z" },
+    { url = "https://files.pythonhosted.org/packages/46/f2/adc32dae6bdbc367853118b9878139ac869419a4ae7ba07185dc31251b76/hf_xet-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:42ee323265f1e6a81b0e11094564fb7f7e0ec75b5105ffd91ae63f403a11931b", size = 3671610, upload-time = "2026-03-31T22:40:10.42Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/19/25d897dcc3f81953e0c2cde9ec186c7a0fee413eb0c9a7a9130d87d94d3a/hf_xet-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:27c976ba60079fb8217f485b9c5c7fcd21c90b0367753805f87cb9f3cdc4418a", size = 3528529, upload-time = "2026-03-31T22:40:09.106Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/36/3e8f85ca9fe09b8de2b2e10c63b3b3353d7dda88a0b3d426dffbe7b8313b/hf_xet-1.4.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:5251d5ece3a81815bae9abab41cf7ddb7bcb8f56411bce0827f4a3071c92fdc6", size = 3801019, upload-time = "2026-03-31T22:39:56.651Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/9c/defb6cb1de28bccb7bd8d95f6e60f72a3d3fa4cb3d0329c26fb9a488bfe7/hf_xet-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1feb0f3abeacee143367c326a128a2e2b60868ec12a36c225afb1d6c5a05e6d2", size = 3558746, upload-time = "2026-03-31T22:39:54.766Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/bd/8d001191893178ff8e826e46ad5299446e62b93cd164e17b0ffea08832ec/hf_xet-1.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b301fc150290ca90b4fccd079829b84bb4786747584ae08b94b4577d82fb791", size = 4207692, upload-time = "2026-03-31T22:39:46.246Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/48/6790b402803250e9936435613d3a78b9aaeee7973439f0918848dde58309/hf_xet-1.4.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:d972fbe95ddc0d3c0fc49b31a8a69f47db35c1e3699bf316421705741aab6653", size = 3986281, upload-time = "2026-03-31T22:39:44.648Z" },
+    { url = "https://files.pythonhosted.org/packages/51/56/ea62552fe53db652a9099eda600b032d75554d0e86c12a73824bfedef88b/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c5b48db1ee344a805a1b9bd2cda9b6b65fe77ed3787bd6e87ad5521141d317cd", size = 4187414, upload-time = "2026-03-31T22:40:04.951Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/f5/bc1456d4638061bea997e6d2db60a1a613d7b200e0755965ec312dc1ef79/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:22bdc1f5fb8b15bf2831440b91d1c9bbceeb7e10c81a12e8d75889996a5c9da8", size = 4424368, upload-time = "2026-03-31T22:40:06.347Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/76/ab597bae87e1f06d18d3ecb8ed7f0d3c9a37037fc32ce76233d369273c64/hf_xet-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:0392c79b7cf48418cd61478c1a925246cf10639f4cd9d94368d8ca1e8df9ea07", size = 3672280, upload-time = "2026-03-31T22:40:16.401Z" },
+    { url = "https://files.pythonhosted.org/packages/62/05/2e462d34e23a09a74d73785dbed71cc5dbad82a72eee2ad60a72a554155d/hf_xet-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:681c92a07796325778a79d76c67011764ecc9042a8c3579332b61b63ae512075", size = 3528945, upload-time = "2026-03-31T22:40:14.995Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/9f/9c23e4a447b8f83120798f9279d0297a4d1360bdbf59ef49ebec78fe2545/hf_xet-1.4.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d0da85329eaf196e03e90b84c2d0aca53bd4573d097a75f99609e80775f98025", size = 3805048, upload-time = "2026-03-31T22:39:53.105Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/f8/7aacb8e5f4a7899d39c787b5984e912e6c18b11be136ef13947d7a66d265/hf_xet-1.4.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e23717ce4186b265f69afa66e6f0069fe7efbf331546f5c313d00e123dc84583", size = 3562178, upload-time = "2026-03-31T22:39:51.295Z" },
+    { url = "https://files.pythonhosted.org/packages/df/9a/a24b26dc8a65f0ecc0fe5be981a19e61e7ca963b85e062c083f3a9100529/hf_xet-1.4.3-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc360b70c815bf340ed56c7b8c63aacf11762a4b099b2fe2c9bd6d6068668c08", size = 4212320, upload-time = "2026-03-31T22:39:42.922Z" },
+    { url = "https://files.pythonhosted.org/packages/53/60/46d493db155d2ee2801b71fb1b0fd67696359047fdd8caee2c914cc50c79/hf_xet-1.4.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:39f2d2e9654cd9b4319885733993807aab6de9dfbd34c42f0b78338d6617421f", size = 3991546, upload-time = "2026-03-31T22:39:41.335Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/f5/067363e1c96c6b17256910830d1b54099d06287e10f4ec6ec4e7e08371fc/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:49ad8a8cead2b56051aa84d7fce3e1335efe68df3cf6c058f22a65513885baac", size = 4193200, upload-time = "2026-03-31T22:40:01.936Z" },
+    { url = "https://files.pythonhosted.org/packages/42/4b/53951592882d9c23080c7644542fda34a3813104e9e11fa1a7d82d419cb8/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7716d62015477a70ea272d2d68cd7cad140f61c52ee452e133e139abfe2c17ba", size = 4429392, upload-time = "2026-03-31T22:40:03.492Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/21/75a6c175b4e79662ad8e62f46a40ce341d8d6b206b06b4320d07d55b188c/hf_xet-1.4.3-cp37-abi3-win_amd64.whl", hash = "sha256:6b591fcad34e272a5b02607485e4f2a1334aebf1bc6d16ce8eb1eb8978ac2021", size = 3677359, upload-time = "2026-03-31T22:40:13.619Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/7c/44314ecd0e89f8b2b51c9d9e5e7a60a9c1c82024ac471d415860557d3cd8/hf_xet-1.4.3-cp37-abi3-win_arm64.whl", hash = "sha256:7c2c7e20bcfcc946dc67187c203463f5e932e395845d098cc2a93f5b67ca0b47", size = 3533664, upload-time = "2026-03-31T22:40:12.152Z" },
 ]

 [[package]]
@@ -2450,21 +2453,22 @@ wheels = [

 [[package]]
 name = "huggingface-hub"
-version = "0.35.3"
+version = "1.10.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "filelock" },
    { name = "fsspec" },
-    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "httpx" },
    { name = "packaging" },
    { name = "pyyaml" },
-    { name = "requests" },
    { name = "tqdm" },
+    { name = "typer" },
    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/10/7e/a0a97de7c73671863ca6b3f61fa12518caf35db37825e43d63a70956738c/huggingface_hub-0.35.3.tar.gz", hash = "sha256:350932eaa5cc6a4747efae85126ee220e4ef1b54e29d31c3b45c5612ddf0b32a", size = 461798, upload-time = "2025-09-29T14:29:58.625Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0c/4d/00734890c7fcfe2c7ff04f1c1a167186c42b19e370a2dd8cfd8c34fc92c4/huggingface_hub-1.10.2.tar.gz", hash = "sha256:4b276f820483b709dc86a53bcb8183ea496b8d8447c9f7f88a115a12b498a95f", size = 758428, upload-time = "2026-04-14T10:42:28.498Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/31/a0/651f93d154cb72323358bf2bbae3e642bdb5d2f1bfc874d096f7cb159fa0/huggingface_hub-0.35.3-py3-none-any.whl", hash = "sha256:0e3a01829c19d86d03793e4577816fe3bdfc1602ac62c7fb220d593d351224ba", size = 564262, upload-time = "2025-09-29T14:29:55.813Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/c9/4c1e1216b24bcab140c83acdf8bc89a846ea17cd8a06cd18e3fd308a297f/huggingface_hub-1.10.2-py3-none-any.whl", hash = "sha256:c26c908767cc711493978dc0b4f5747ba7841602997cc98bfd628450a28cf9bc", size = 642581, upload-time = "2026-04-14T10:42:26.563Z" },
 ]

 [[package]]
@@ -4444,7 +4448,7 @@ backend = [
    { name = "httpx", extras = ["http2"], specifier = "==0.28.1" },
    { name = "httpx-oauth", specifier = "==0.15.1" },
    { name = "hubspot-api-client", specifier = "==11.1.0" },
-    { name = "huggingface-hub", specifier = "==0.35.3" },
+    { name = "huggingface-hub", specifier = "==1.10.2" },
    { name = "inflection", specifier = "==0.5.1" },
    { name = "jira", specifier = "==3.10.5" },
    { name = "jsonref", specifier = "==1.1.0" },
@@ -4467,7 +4471,7 @@ backend = [
    { name = "opensearch-py", specifier = "==3.0.0" },
    { name = "opentelemetry-proto", specifier = ">=1.39.0" },
    { name = "passlib", specifier = "==1.7.4" },
-    { name = "playwright", specifier = "==1.55.0" },
+    { name = "playwright", specifier = "==1.58.0" },
    { name = "psutil", specifier = "==7.1.3" },
    { name = "psycopg2-binary", specifier = "==2.9.9" },
    { name = "puremagic", specifier = "==1.28" },
@@ -4555,10 +4559,10 @@ model-server = [
    { name = "einops", specifier = "==0.8.1" },
    { name = "numpy", specifier = "==2.4.1" },
    { name = "safetensors", specifier = "==0.5.3" },
-    { name = "sentence-transformers", specifier = "==4.0.2" },
+    { name = "sentence-transformers", specifier = "==5.4.1" },
    { name = "sentry-sdk", extras = ["fastapi", "celery", "starlette"], specifier = "==2.14.0" },
    { name = "torch", specifier = "==2.9.1" },
-    { name = "transformers", specifier = "==4.53.0" },
+    { name = "transformers", specifier = "==5.5.4" },
 ]

 [[package]]
@@ -5077,21 +5081,21 @@ wheels = [

 [[package]]
 name = "playwright"
-version = "1.55.0"
+version = "1.58.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "greenlet" },
    { name = "pyee" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/80/3a/c81ff76df266c62e24f19718df9c168f49af93cabdbc4608ae29656a9986/playwright-1.55.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:d7da108a95001e412effca4f7610de79da1637ccdf670b1ae3fdc08b9694c034", size = 40428109, upload-time = "2025-08-28T15:46:20.357Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/f5/bdb61553b20e907196a38d864602a9b4a461660c3a111c67a35179b636fa/playwright-1.55.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:8290cf27a5d542e2682ac274da423941f879d07b001f6575a5a3a257b1d4ba1c", size = 38687254, upload-time = "2025-08-28T15:46:23.925Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/64/48b2837ef396487807e5ab53c76465747e34c7143fac4a084ef349c293a8/playwright-1.55.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:25b0d6b3fd991c315cca33c802cf617d52980108ab8431e3e1d37b5de755c10e", size = 40428108, upload-time = "2025-08-28T15:46:27.119Z" },
-    { url = "https://files.pythonhosted.org/packages/08/33/858312628aa16a6de97839adc2ca28031ebc5391f96b6fb8fdf1fcb15d6c/playwright-1.55.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:c6d4d8f6f8c66c483b0835569c7f0caa03230820af8e500c181c93509c92d831", size = 45905643, upload-time = "2025-08-28T15:46:30.312Z" },
-    { url = "https://files.pythonhosted.org/packages/83/83/b8d06a5b5721931aa6d5916b83168e28bd891f38ff56fe92af7bdee9860f/playwright-1.55.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29a0777c4ce1273acf90c87e4ae2fe0130182100d99bcd2ae5bf486093044838", size = 45296647, upload-time = "2025-08-28T15:46:33.221Z" },
-    { url = "https://files.pythonhosted.org/packages/06/2e/9db64518aebcb3d6ef6cd6d4d01da741aff912c3f0314dadb61226c6a96a/playwright-1.55.0-py3-none-win32.whl", hash = "sha256:29e6d1558ad9d5b5c19cbec0a72f6a2e35e6353cd9f262e22148685b86759f90", size = 35476046, upload-time = "2025-08-28T15:46:36.184Z" },
-    { url = "https://files.pythonhosted.org/packages/46/4f/9ba607fa94bb9cee3d4beb1c7b32c16efbfc9d69d5037fa85d10cafc618b/playwright-1.55.0-py3-none-win_amd64.whl", hash = "sha256:7eb5956473ca1951abb51537e6a0da55257bb2e25fc37c2b75af094a5c93736c", size = 35476048, upload-time = "2025-08-28T15:46:38.867Z" },
-    { url = "https://files.pythonhosted.org/packages/21/98/5ca173c8ec906abde26c28e1ecb34887343fd71cc4136261b90036841323/playwright-1.55.0-py3-none-win_arm64.whl", hash = "sha256:012dc89ccdcbd774cdde8aeee14c08e0dd52ddb9135bf10e9db040527386bd76", size = 31225543, upload-time = "2025-08-28T15:46:41.613Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/c9/9c6061d5703267f1baae6a4647bfd1862e386fbfdb97d889f6f6ae9e3f64/playwright-1.58.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:96e3204aac292ee639edbfdef6298b4be2ea0a55a16b7068df91adac077cc606", size = 42251098, upload-time = "2026-01-30T15:09:24.028Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/40/59d34a756e02f8c670f0fee987d46f7ee53d05447d43cd114ca015cb168c/playwright-1.58.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:70c763694739d28df71ed578b9c8202bb83e8fe8fb9268c04dd13afe36301f71", size = 41039625, upload-time = "2026-01-30T15:09:27.558Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/ee/3ce6209c9c74a650aac9028c621f357a34ea5cd4d950700f8e2c4b7fe2c4/playwright-1.58.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:185e0132578733d02802dfddfbbc35f42be23a45ff49ccae5081f25952238117", size = 42251098, upload-time = "2026-01-30T15:09:30.461Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/af/009958cbf23fac551a940d34e3206e6c7eed2b8c940d0c3afd1feb0b0589/playwright-1.58.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:c95568ba1eda83812598c1dc9be60b4406dffd60b149bc1536180ad108723d6b", size = 46235268, upload-time = "2026-01-30T15:09:33.787Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/a6/0e66ad04b6d3440dae73efb39540c5685c5fc95b17c8b29340b62abbd952/playwright-1.58.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f9999948f1ab541d98812de25e3a8c410776aa516d948807140aff797b4bffa", size = 45964214, upload-time = "2026-01-30T15:09:36.751Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/4b/236e60ab9f6d62ed0fd32150d61f1f494cefbf02304c0061e78ed80c1c32/playwright-1.58.0-py3-none-win32.whl", hash = "sha256:1e03be090e75a0fabbdaeab65ce17c308c425d879fa48bb1d7986f96bfad0b99", size = 36815998, upload-time = "2026-01-30T15:09:39.627Z" },
+    { url = "https://files.pythonhosted.org/packages/41/f8/5ec599c5e59d2f2f336a05b4f318e733077cd5044f24adb6f86900c3e6a7/playwright-1.58.0-py3-none-win_amd64.whl", hash = "sha256:a2bf639d0ce33b3ba38de777e08697b0d8f3dc07ab6802e4ac53fb65e3907af8", size = 36816005, upload-time = "2026-01-30T15:09:42.449Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/c4/cc0229fea55c87d6c9c67fe44a21e2cd28d1d558a5478ed4d617e9fb0c93/playwright-1.58.0-py3-none-win_arm64.whl", hash = "sha256:32ffe5c303901a13a0ecab91d1c3f74baf73b84f4bedbb6b935f5bc11cc98e1b", size = 33085919, upload-time = "2026-01-30T15:09:45.71Z" },
 ]

 [[package]]
@@ -6829,11 +6833,11 @@ wheels = [

 [[package]]
 name = "sentence-transformers"
-version = "4.0.2"
+version = "5.4.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "huggingface-hub" },
-    { name = "pillow" },
+    { name = "numpy" },
    { name = "scikit-learn" },
    { name = "scipy" },
    { name = "torch" },
@@ -6841,9 +6845,9 @@ dependencies = [
    { name = "transformers" },
    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/3c/74/606783c6b2e80a609af25d2c487bbe32eb8f0970b0f7519fbb1a099f3ab8/sentence_transformers-4.0.2.tar.gz", hash = "sha256:d33d0c5a69ae0d682115c90e74fc1dc24c4786aeea78e26f5889b037e5921880", size = 267724, upload-time = "2025-04-03T11:29:06.046Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4d/68/7f98c221940ce783b492ad6140384daf2e2918cd7175009d6a362c22b9ee/sentence_transformers-5.4.1.tar.gz", hash = "sha256:436bcb1182a0ff42a8fb2b1c43498a70d0a75b688d182f2cd0d1dd115af61ddc", size = 428910, upload-time = "2026-04-14T13:34:59.006Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/32/58/770e1e762893abbfe3cd048f1ed1ea6e00122a195651ea98fb27f55ad17a/sentence_transformers-4.0.2-py3-none-any.whl", hash = "sha256:25f5086d0746c22177f9fb7d431f3eebe6375f3afe1dc7c341c4ca9061e98771", size = 340618, upload-time = "2025-04-03T11:29:04.037Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/d9/3a9b6f2ccdedc9dc00fe37b2fc58f58f8efbff44565cf4bf39d8568bb13a/sentence_transformers-5.4.1-py3-none-any.whl", hash = "sha256:a6d640fc363849b63affb8e140e9d328feabab86f83d58ac3e16b1c28140b790", size = 571311, upload-time = "2026-04-14T13:34:57.731Z" },
 ]

 [[package]]
@@ -7192,27 +7196,28 @@ wheels = [

 [[package]]
 name = "tokenizers"
-version = "0.21.4"
+version = "0.22.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "huggingface-hub" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c2/2f/402986d0823f8d7ca139d969af2917fefaa9b947d1fb32f6168c509f2492/tokenizers-0.21.4.tar.gz", hash = "sha256:fa23f85fbc9a02ec5c6978da172cdcbac23498c3ca9f3645c5c68740ac007880", size = 351253, upload-time = "2025-07-28T15:48:54.325Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/98/c6/fdb6f72bf6454f52eb4a2510be7fb0f614e541a2554d6210e370d85efff4/tokenizers-0.21.4-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:2ccc10a7c3bcefe0f242867dc914fc1226ee44321eb618cfe3019b5df3400133", size = 2863987, upload-time = "2025-07-28T15:48:44.877Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/a6/28975479e35ddc751dc1ddc97b9b69bf7fcf074db31548aab37f8116674c/tokenizers-0.21.4-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:5e2f601a8e0cd5be5cc7506b20a79112370b9b3e9cb5f13f68ab11acd6ca7d60", size = 2732457, upload-time = "2025-07-28T15:48:43.265Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/8f/24f39d7b5c726b7b0be95dca04f344df278a3fe3a4deb15a975d194cbb32/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b376f5a1aee67b4d29032ee85511bbd1b99007ec735f7f35c8a2eb104eade5", size = 3012624, upload-time = "2025-07-28T13:22:43.895Z" },
-    { url = "https://files.pythonhosted.org/packages/58/47/26358925717687a58cb74d7a508de96649544fad5778f0cd9827398dc499/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2107ad649e2cda4488d41dfd031469e9da3fcbfd6183e74e4958fa729ffbf9c6", size = 2939681, upload-time = "2025-07-28T13:22:47.499Z" },
-    { url = "https://files.pythonhosted.org/packages/99/6f/cc300fea5db2ab5ddc2c8aea5757a27b89c84469899710c3aeddc1d39801/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c73012da95afafdf235ba80047699df4384fdc481527448a078ffd00e45a7d9", size = 3247445, upload-time = "2025-07-28T15:48:39.711Z" },
-    { url = "https://files.pythonhosted.org/packages/be/bf/98cb4b9c3c4afd8be89cfa6423704337dc20b73eb4180397a6e0d456c334/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f23186c40395fc390d27f519679a58023f368a0aad234af145e0f39ad1212732", size = 3428014, upload-time = "2025-07-28T13:22:49.569Z" },
-    { url = "https://files.pythonhosted.org/packages/75/c7/96c1cc780e6ca7f01a57c13235dd05b7bc1c0f3588512ebe9d1331b5f5ae/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc88bb34e23a54cc42713d6d98af5f1bf79c07653d24fe984d2d695ba2c922a2", size = 3193197, upload-time = "2025-07-28T13:22:51.471Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/90/273b6c7ec78af547694eddeea9e05de771278bd20476525ab930cecaf7d8/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51b7eabb104f46c1c50b486520555715457ae833d5aee9ff6ae853d1130506ff", size = 3115426, upload-time = "2025-07-28T15:48:41.439Z" },
-    { url = "https://files.pythonhosted.org/packages/91/43/c640d5a07e95f1cf9d2c92501f20a25f179ac53a4f71e1489a3dcfcc67ee/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:714b05b2e1af1288bd1bc56ce496c4cebb64a20d158ee802887757791191e6e2", size = 9089127, upload-time = "2025-07-28T15:48:46.472Z" },
-    { url = "https://files.pythonhosted.org/packages/44/a1/dd23edd6271d4dca788e5200a807b49ec3e6987815cd9d0a07ad9c96c7c2/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:1340ff877ceedfa937544b7d79f5b7becf33a4cfb58f89b3b49927004ef66f78", size = 9055243, upload-time = "2025-07-28T15:48:48.539Z" },
-    { url = "https://files.pythonhosted.org/packages/21/2b/b410d6e9021c4b7ddb57248304dc817c4d4970b73b6ee343674914701197/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:3c1f4317576e465ac9ef0d165b247825a2a4078bcd01cba6b54b867bdf9fdd8b", size = 9298237, upload-time = "2025-07-28T15:48:50.443Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/0a/42348c995c67e2e6e5c89ffb9cfd68507cbaeb84ff39c49ee6e0a6dd0fd2/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:c212aa4e45ec0bb5274b16b6f31dd3f1c41944025c2358faaa5782c754e84c24", size = 9461980, upload-time = "2025-07-28T15:48:52.325Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/d3/dacccd834404cd71b5c334882f3ba40331ad2120e69ded32cf5fda9a7436/tokenizers-0.21.4-cp39-abi3-win32.whl", hash = "sha256:6c42a930bc5f4c47f4ea775c91de47d27910881902b0f20e4990ebe045a415d0", size = 2329871, upload-time = "2025-07-28T15:48:56.841Z" },
-    { url = "https://files.pythonhosted.org/packages/41/f2/fd673d979185f5dcbac4be7d09461cbb99751554ffb6718d0013af8604cb/tokenizers-0.21.4-cp39-abi3-win_amd64.whl", hash = "sha256:475d807a5c3eb72c59ad9b5fcdb254f6e17f53dfcbb9903233b0dfa9c943b597", size = 2507568, upload-time = "2025-07-28T15:48:55.456Z" },
+    { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" },
+    { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" },
+    { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" },
+    { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" },
+    { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" },
+    { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload-time = "2026-01-05T10:45:20.593Z" },
+    { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload-time = "2026-01-05T10:45:18.411Z" },
+    { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" },
 ]

 [[package]]
@@ -7338,23 +7343,22 @@ wheels = [

 [[package]]
 name = "transformers"
-version = "4.53.0"
+version = "5.5.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock" },
    { name = "huggingface-hub" },
    { name = "numpy" },
    { name = "packaging" },
    { name = "pyyaml" },
    { name = "regex" },
-    { name = "requests" },
    { name = "safetensors" },
    { name = "tokenizers" },
    { name = "tqdm" },
+    { name = "typer" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e8/40/f2d2c3bcf5c6135027cab0fd7db52f6149a1c23acc4e45f914c43d362386/transformers-4.53.0.tar.gz", hash = "sha256:f89520011b4a73066fdc7aabfa158317c3934a22e3cd652d7ffbc512c4063841", size = 9177265, upload-time = "2025-06-26T16:10:54.729Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a5/1e/1e244ab2ab50a863e6b52cc55761910567fa532b69a6740f6e99c5fdbd98/transformers-5.5.4.tar.gz", hash = "sha256:2e67cadba81fc7608cc07c4dd54f524820bc3d95b1cabd0ef3db7733c4f8b82e", size = 8227649, upload-time = "2026-04-13T16:55:55.181Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5e/0c/68d03a38f6ab2ba2b2829eb11b334610dd236e7926787f7656001b68e1f2/transformers-4.53.0-py3-none-any.whl", hash = "sha256:7d8039ff032c01a2d7f8a8fe0066620367003275f023815a966e62203f9f5dd7", size = 10821970, upload-time = "2025-06-26T16:10:51.505Z" },
+    { url = "https://files.pythonhosted.org/packages/29/fb/162a66789c65e5afa3b051309240c26bf37fbc8fea285b4546ae747995a2/transformers-5.5.4-py3-none-any.whl", hash = "sha256:0bd6281b82966fe5a7a16f553ea517a9db1dee6284d7cb224dfd88fc0dd1c167", size = 10236696, upload-time = "2026-04-13T16:55:51.497Z" },
 ]

 [[package]]
--- a/web/lib/opal/src/components/buttons/button/components.tsx
+++ b/web/lib/opal/src/components/buttons/button/components.tsx
@@ -1,14 +1,11 @@
-import "@opal/components/tooltip.css";
 import { Interactive, type InteractiveStatelessProps } from "@opal/core";
 import type {
  ContainerSizeVariants,
  ExtremaSizeVariants,
  RichStr,
 } from "@opal/types";
-import { Text } from "@opal/components";
-import type { TooltipSide } from "@opal/components";
+import { Text, type TooltipSide, Tooltip } from "@opal/components";
 import type { IconFunctionComponent } from "@opal/types";
-import * as TooltipPrimitive from "@radix-ui/react-tooltip";
 import { iconWrapper } from "@opal/components/buttons/icon-wrapper";

 // ---------------------------------------------------------------------------
@@ -118,24 +115,11 @@ function Button({
    </Interactive.Stateless>
  );

-  if (tooltip) {
-    return (
-      <TooltipPrimitive.Root>
-        <TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
-        <TooltipPrimitive.Portal>
-          <TooltipPrimitive.Content
-            className="opal-tooltip"
-            side={tooltipSide}
-            sideOffset={4}
-          >
-            {tooltip}
-          </TooltipPrimitive.Content>
-        </TooltipPrimitive.Portal>
-      </TooltipPrimitive.Root>
-    );
-  }
-
-  return button;
+  return (
+    <Tooltip tooltip={tooltip} side={tooltipSide}>
+      {button}
+    </Tooltip>
+  );
 }

 export { Button, type ButtonProps };
--- a/web/lib/opal/src/components/buttons/filter-button/components.tsx
+++ b/web/lib/opal/src/components/buttons/filter-button/components.tsx
@@ -3,11 +3,9 @@ import {
  type InteractiveStatefulInteraction,
  type InteractiveStatefulProps,
 } from "@opal/core";
-import type { TooltipSide } from "@opal/components";
+import { Text, Tooltip, type TooltipSide } from "@opal/components";
 import type { IconFunctionComponent, RichStr } from "@opal/types";
-import { Text } from "@opal/components";
 import { SvgX } from "@opal/icons";
-import * as TooltipPrimitive from "@radix-ui/react-tooltip";
 import { iconWrapper } from "@opal/components/buttons/icon-wrapper";
 import { ChevronIcon } from "@opal/components/buttons/chevron";
 import { Button } from "@opal/components/buttons/button/components";
@@ -100,21 +98,10 @@ function FilterButton({
    </div>
  );

-  if (!tooltip) return button;
-
  return (
-    <TooltipPrimitive.Root>
-      <TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
-      <TooltipPrimitive.Portal>
-        <TooltipPrimitive.Content
-          className="opal-tooltip"
-          side={tooltipSide}
-          sideOffset={4}
-        >
-          {tooltip}
-        </TooltipPrimitive.Content>
-      </TooltipPrimitive.Portal>
-    </TooltipPrimitive.Root>
+    <Tooltip tooltip={tooltip} side={tooltipSide}>
+      {button}
+    </Tooltip>
  );
 }

--- a/web/lib/opal/src/components/buttons/line-item-button/components.tsx
+++ b/web/lib/opal/src/components/buttons/line-item-button/components.tsx
@@ -3,12 +3,10 @@ import {
  type InteractiveStatefulProps,
  InteractiveContainerRoundingVariant,
 } from "@opal/core";
-import type { ExtremaSizeVariants } from "@opal/types";
-import type { TooltipSide } from "@opal/components";
-import type { DistributiveOmit } from "@opal/types";
+import type { ExtremaSizeVariants, DistributiveOmit } from "@opal/types";
+import { Tooltip, type TooltipSide } from "@opal/components";
 import type { ContentActionProps } from "@opal/layouts/content-action/components";
 import { ContentAction } from "@opal/layouts";
-import * as TooltipPrimitive from "@radix-ui/react-tooltip";

 // ---------------------------------------------------------------------------
 // Types
@@ -98,21 +96,10 @@ function LineItemButton({
    </Interactive.Stateful>
  );

-  if (!tooltip) return item;
-
  return (
-    <TooltipPrimitive.Root>
-      <TooltipPrimitive.Trigger asChild>{item}</TooltipPrimitive.Trigger>
-      <TooltipPrimitive.Portal>
-        <TooltipPrimitive.Content
-          className="opal-tooltip"
-          side={tooltipSide}
-          sideOffset={4}
-        >
-          {tooltip}
-        </TooltipPrimitive.Content>
-      </TooltipPrimitive.Portal>
-    </TooltipPrimitive.Root>
+    <Tooltip tooltip={tooltip} side={tooltipSide}>
+      {item}
+    </Tooltip>
  );
 }

--- a/web/lib/opal/src/components/buttons/open-button/components.tsx
+++ b/web/lib/opal/src/components/buttons/open-button/components.tsx
@@ -6,13 +6,11 @@ import {
 import type {
  ContainerSizeVariants,
  ExtremaSizeVariants,
+  IconFunctionComponent,
  RichStr,
 } from "@opal/types";
-import { Text } from "@opal/components";
+import { Text, Tooltip, type TooltipSide } from "@opal/components";
 import type { InteractiveContainerRoundingVariant } from "@opal/core";
-import type { TooltipSide } from "@opal/components";
-import type { IconFunctionComponent } from "@opal/types";
-import * as TooltipPrimitive from "@radix-ui/react-tooltip";
 import { cn } from "@opal/utils";
 import { iconWrapper } from "@opal/components/buttons/icon-wrapper";
 import { ChevronIcon } from "@opal/components/buttons/chevron";
@@ -172,21 +170,10 @@ function OpenButton({
  const resolvedTooltip =
    tooltip ?? (foldable && disabled && children ? children : undefined);

-  if (!resolvedTooltip) return button;
-
  return (
-    <TooltipPrimitive.Root>
-      <TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
-      <TooltipPrimitive.Portal>
-        <TooltipPrimitive.Content
-          className="opal-tooltip"
-          side={tooltipSide}
-          sideOffset={4}
-        >
-          <Text>{resolvedTooltip}</Text>
-        </TooltipPrimitive.Content>
-      </TooltipPrimitive.Portal>
-    </TooltipPrimitive.Root>
+    <Tooltip tooltip={resolvedTooltip} side={tooltipSide}>
+      {button}
+    </Tooltip>
  );
 }

--- a/web/lib/opal/src/components/buttons/select-button/components.tsx
+++ b/web/lib/opal/src/components/buttons/select-button/components.tsx
@@ -5,12 +5,10 @@ import { Interactive, type InteractiveStatefulProps } from "@opal/core";
 import type {
  ContainerSizeVariants,
  ExtremaSizeVariants,
+  IconFunctionComponent,
  RichStr,
 } from "@opal/types";
-import { Text } from "@opal/components";
-import type { TooltipSide } from "@opal/components";
-import type { IconFunctionComponent } from "@opal/types";
-import * as TooltipPrimitive from "@radix-ui/react-tooltip";
+import { Text, Tooltip, type TooltipSide } from "@opal/components";
 import { cn } from "@opal/utils";
 import { iconWrapper } from "@opal/components/buttons/icon-wrapper";

@@ -129,21 +127,10 @@ function SelectButton({
  const resolvedTooltip =
    tooltip ?? (foldable && disabled && children ? children : undefined);

-  if (!resolvedTooltip) return button;
-
  return (
-    <TooltipPrimitive.Root>
-      <TooltipPrimitive.Trigger asChild>{button}</TooltipPrimitive.Trigger>
-      <TooltipPrimitive.Portal>
-        <TooltipPrimitive.Content
-          className="opal-tooltip"
-          side={tooltipSide}
-          sideOffset={4}
-        >
-          <Text>{resolvedTooltip}</Text>
-        </TooltipPrimitive.Content>
-      </TooltipPrimitive.Portal>
-    </TooltipPrimitive.Root>
+    <Tooltip tooltip={resolvedTooltip} side={tooltipSide}>
+      {button}
+    </Tooltip>
  );
 }

--- a/web/lib/opal/src/components/buttons/sidebar-tab/components.tsx
+++ b/web/lib/opal/src/components/buttons/sidebar-tab/components.tsx
@@ -5,10 +5,8 @@ import type { ButtonType, IconFunctionComponent } from "@opal/types";
 import type { Route } from "next";
 import { Interactive, type InteractiveStatefulVariant } from "@opal/core";
 import { ContentAction } from "@opal/layouts";
-import { Text } from "@opal/components";
+import { Text, Tooltip } from "@opal/components";
 import Link from "next/link";
-import * as TooltipPrimitive from "@radix-ui/react-tooltip";
-import "@opal/components/tooltip.css";

 // ---------------------------------------------------------------------------
 // Types
@@ -145,18 +143,9 @@ function SidebarTab({
  if (typeof children !== "string") return content;
  if (folded) {
    return (
-      <TooltipPrimitive.Root>
-        <TooltipPrimitive.Trigger asChild>{content}</TooltipPrimitive.Trigger>
-        <TooltipPrimitive.Portal>
-          <TooltipPrimitive.Content
-            className="opal-tooltip"
-            side="right"
-            sideOffset={4}
-          >
-            {children}
-          </TooltipPrimitive.Content>
-        </TooltipPrimitive.Portal>
-      </TooltipPrimitive.Root>
+      <Tooltip tooltip={children} side="right">
+        {content}
+      </Tooltip>
    );
  }
  return content;
--- a/web/lib/opal/src/components/cards/message-card/MessageCard.stories.tsx
+++ b/web/lib/opal/src/components/cards/message-card/MessageCard.stories.tsx
@@ -0,0 +1,106 @@
+import type { Meta, StoryObj } from "@storybook/react";
+import { MessageCard } from "@opal/components/cards/message-card/components";
+import { Button } from "@opal/components/buttons/button/components";
+
+const meta: Meta<typeof MessageCard> = {
+  title: "opal/components/MessageCard",
+  component: MessageCard,
+  tags: ["autodocs"],
+};
+
+export default meta;
+type Story = StoryObj<typeof MessageCard>;
+
+export const Default: Story = {
+  render: () => (
+    <div className="w-[32rem]">
+      <MessageCard title="Note" description="This is a default message card." />
+    </div>
+  ),
+};
+
+export const Info: Story = {
+  render: () => (
+    <div className="w-[32rem]">
+      <MessageCard
+        variant="info"
+        title="Heads up"
+        description="Changes apply to newly indexed documents only."
+      />
+    </div>
+  ),
+};
+
+export const Success: Story = {
+  render: () => (
+    <div className="w-[32rem]">
+      <MessageCard
+        variant="success"
+        title="All set"
+        description="Your embedding model has been updated successfully."
+      />
+    </div>
+  ),
+};
+
+export const Warning: Story = {
+  render: () => (
+    <div className="w-[32rem]">
+      <MessageCard
+        variant="warning"
+        title="Re-indexing required"
+        description="Toggle this setting to re-index all documents."
+      />
+    </div>
+  ),
+};
+
+export const Error: Story = {
+  render: () => (
+    <div className="w-[32rem]">
+      <MessageCard
+        variant="error"
+        title="Connection failed"
+        description="Unable to reach the embedding model server."
+      />
+    </div>
+  ),
+};
+
+export const WithBottomChildren: Story = {
+  render: () => (
+    <div className="w-[32rem]">
+      <MessageCard
+        variant="warning"
+        title="Action required"
+        description="Your documents need to be re-indexed after this change."
+        bottomChildren={
+          <div className="flex justify-end pt-2">
+            <Button prominence="secondary" size="sm">
+              Re-index Now
+            </Button>
+          </div>
+        }
+      />
+    </div>
+  ),
+};
+
+export const AllVariants: Story = {
+  render: () => (
+    <div className="flex flex-col gap-4 w-[32rem]">
+      {(["default", "info", "success", "warning", "error"] as const).map(
+        (variant) => (
+          <MessageCard
+            key={variant}
+            variant={variant}
+            title={`${
+              variant.charAt(0).toUpperCase() + variant.slice(1)
+            } variant`}
+            description={`This is a ${variant} message card.`}
+          />
+        )
+      )}
+    </div>
+  ),
+};
--- a/web/lib/opal/src/components/cards/message-card/README.md
+++ b/web/lib/opal/src/components/cards/message-card/README.md
@@ -0,0 +1,47 @@
+# MessageCard
+
+**Import:** `import { MessageCard } from "@opal/components";`
+
+A styled card for displaying messages, alerts, or status notifications. Uses `Content` internally
+for consistent title/description/icon layout. Supports 5 variants with corresponding background
+and border colors.
+
+## Props
+
+| Prop | Type | Default | Description |
+|------|------|---------|-------------|
+| `variant` | `"default" \| "info" \| "success" \| "warning" \| "error"` | `"default"` | Visual variant (controls background, border, and icon) |
+| `icon` | `IconFunctionComponent` | per variant | Override the default variant icon |
+| `title` | `string \| RichStr` | — | Main title text |
+| `description` | `string \| RichStr` | — | Description below the title |
+| `bottomChildren` | `ReactNode` | — | Content below a divider, under the main content |
+| `rightChildren` | `ReactNode` | — | Content on the right side. Mutually exclusive with `onClose`. |
+| `onClose` | `() => void` | — | Close button callback. When omitted, no close button is rendered. |
+
+## Usage
+
+```tsx
+import { MessageCard } from "@opal/components";
+
+// Simple info message
+<MessageCard
+  variant="info"
+  title="Heads up"
+  description="Changes apply to newly indexed documents only."
+/>
+
+// Warning with bottom content
+<MessageCard
+  variant="warning"
+  title="Re-indexing required"
+  description="Toggle this setting to re-index all documents."
+  bottomChildren={<Button>Re-index Now</Button>}
+/>
+
+// Error state
+<MessageCard
+  variant="error"
+  title="Connection failed"
+  description="Unable to reach the embedding model server."
+/>
+```
--- a/web/lib/opal/src/components/cards/message-card/components.tsx
+++ b/web/lib/opal/src/components/cards/message-card/components.tsx
@@ -0,0 +1,160 @@
+import "@opal/components/cards/message-card/styles.css";
+import { cn } from "@opal/utils";
+import type { RichStr, IconFunctionComponent } from "@opal/types";
+import { ContentAction } from "@opal/layouts";
+import { Button, Divider } from "@opal/components";
+import {
+  SvgAlertCircle,
+  SvgAlertTriangle,
+  SvgCheckCircle,
+  SvgX,
+  SvgXOctagon,
+} from "@opal/icons";
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+type MessageCardVariant = "default" | "info" | "success" | "warning" | "error";
+
+interface MessageCardBaseProps {
+  /** Visual variant controlling background, border, and icon. @default "default" */
+  variant?: MessageCardVariant;
+
+  /** Override the default variant icon. */
+  icon?: IconFunctionComponent;
+
+  /** Main title text. */
+  title: string | RichStr;
+
+  /** Optional description below the title. */
+  description?: string | RichStr;
+
+  /**
+   * Content rendered below a divider, under the main content area.
+   * When provided, a `Divider` is inserted between the `ContentAction` and this node.
+   */
+  bottomChildren?: React.ReactNode;
+
+  /** Ref forwarded to the root `<div>`. */
+  ref?: React.Ref<HTMLDivElement>;
+}
+
+type MessageCardProps = MessageCardBaseProps &
+  (
+    | {
+        /** Content rendered on the right side of the card. Mutually exclusive with `onClose`. */
+        rightChildren?: React.ReactNode;
+        onClose?: never;
+      }
+    | {
+        rightChildren?: never;
+        /** Close button callback. Mutually exclusive with `rightChildren`. */
+        onClose?: () => void;
+      }
+  );
+
+// ---------------------------------------------------------------------------
+// Variant config
+// ---------------------------------------------------------------------------
+
+const VARIANT_CONFIG: Record<
+  MessageCardVariant,
+  { icon: IconFunctionComponent; iconClass: string }
+> = {
+  default: { icon: SvgAlertCircle, iconClass: "stroke-text-03" },
+  info: { icon: SvgAlertCircle, iconClass: "stroke-status-info-05" },
+  success: { icon: SvgCheckCircle, iconClass: "stroke-status-success-05" },
+  warning: { icon: SvgAlertTriangle, iconClass: "stroke-status-warning-05" },
+  error: { icon: SvgXOctagon, iconClass: "stroke-status-error-05" },
+};
+
+// ---------------------------------------------------------------------------
+// MessageCard
+// ---------------------------------------------------------------------------
+
+/**
+ * A styled card for displaying messages, alerts, or status notifications.
+ *
+ * Uses `ContentAction` internally for consistent title/description/icon layout
+ * with optional right-side actions. Supports 5 variants with corresponding
+ * background, border, and icon colors.
+ *
+ * `onClose` and `rightChildren` are mutually exclusive — specify one or neither.
+ *
+ * @example
+ * ```tsx
+ * import { MessageCard } from "@opal/components";
+ *
+ * // Simple message
+ * <MessageCard
+ *   variant="info"
+ *   title="Heads up"
+ *   description="Changes apply to newly indexed documents only."
+ * />
+ *
+ * // With close button
+ * <MessageCard
+ *   variant="warning"
+ *   title="Re-indexing required"
+ *   onClose={() => setDismissed(true)}
+ * />
+ *
+ * // With right children
+ * <MessageCard
+ *   variant="error"
+ *   title="Connection failed"
+ *   rightChildren={<Button>Retry</Button>}
+ * />
+ * ```
+ */
+function MessageCard({
+  variant = "default",
+  icon: iconOverride,
+  title,
+  description,
+  bottomChildren,
+  rightChildren,
+  onClose,
+  ref,
+}: MessageCardProps) {
+  const { icon: DefaultIcon, iconClass } = VARIANT_CONFIG[variant];
+  const Icon = iconOverride ?? DefaultIcon;
+
+  const right = onClose ? (
+    <Button
+      icon={SvgX}
+      prominence="internal"
+      size="md"
+      onClick={onClose}
+      aria-label="Close"
+    />
+  ) : (
+    rightChildren
+  );
+
+  return (
+    <div className="opal-message-card" data-variant={variant} ref={ref}>
+      <ContentAction
+        icon={(props) => (
+          <Icon {...props} className={cn(props.className, iconClass)} />
+        )}
+        title={title}
+        description={description}
+        sizePreset="main-ui"
+        variant="section"
+        paddingVariant="lg"
+        rightChildren={right}
+      />
+
+      {bottomChildren && (
+        <>
+          <Divider paddingParallel="sm" paddingPerpendicular="xs" />
+          {bottomChildren}
+        </>
+      )}
+    </div>
+  );
+}
+
+export { MessageCard, type MessageCardProps, type MessageCardVariant };
--- a/web/lib/opal/src/components/cards/message-card/styles.css
+++ b/web/lib/opal/src/components/cards/message-card/styles.css
@@ -0,0 +1,25 @@
+.opal-message-card {
+  @apply flex flex-col self-stretch rounded-16 border p-2;
+}
+
+/* Variant colors */
+
+.opal-message-card[data-variant="default"] {
+  @apply bg-background-tint-01 border-border-01;
+}
+
+.opal-message-card[data-variant="info"] {
+  @apply bg-status-info-00 border-status-info-02;
+}
+
+.opal-message-card[data-variant="success"] {
+  @apply bg-status-success-00 border-status-success-02;
+}
+
+.opal-message-card[data-variant="warning"] {
+  @apply bg-status-warning-00 border-status-warning-02;
+}
+
+.opal-message-card[data-variant="error"] {
+  @apply bg-status-error-00 border-status-error-02;
+}
--- a/web/lib/opal/src/components/index.ts
+++ b/web/lib/opal/src/components/index.ts
@@ -1,7 +1,10 @@
-import "@opal/components/tooltip.css";
-
-/* Shared types */
-export type TooltipSide = "top" | "bottom" | "left" | "right";
+/* Tooltip */
+export {
+  Tooltip,
+  type TooltipProps,
+  type TooltipSide,
+  type TooltipAlign,
+} from "@opal/components/tooltip/components";

 /* Button */
 export {
@@ -80,6 +83,13 @@ export {
  type EmptyMessageCardProps,
 } from "@opal/components/cards/empty-message-card/components";

+/* MessageCard */
+export {
+  MessageCard,
+  type MessageCardProps,
+  type MessageCardVariant,
+} from "@opal/components/cards/message-card/components";
+
 /* Pagination */
 export {
  Pagination,
--- a/web/lib/opal/src/components/text/InlineMarkdown.tsx
+++ b/web/lib/opal/src/components/text/InlineMarkdown.tsx
@@ -64,7 +64,7 @@ export default function InlineMarkdown({ content }: InlineMarkdownProps) {
 // RichStr helpers
 // ---------------------------------------------------------------------------

-function isRichStr(value: unknown): value is RichStr {
+export function isRichStr(value: unknown): value is RichStr {
  return (
    typeof value === "object" &&
    value !== null &&
--- a/web/lib/opal/src/components/tooltip/README.md
+++ b/web/lib/opal/src/components/tooltip/README.md
@@ -0,0 +1,51 @@
+# Tooltip
+
+**Import:** `import { Tooltip } from "@opal/components";`
+
+A minimal tooltip wrapper that shows content on hover. When `tooltip` is `undefined`, children
+are returned as-is with no wrapping. Uses Radix Tooltip primitives internally.
+
+Supports both uncontrolled (default hover behavior) and controlled (`open` + `onOpenChange`)
+modes.
+
+## Props
+
+| Prop | Type | Default | Description |
+|------|------|---------|-------------|
+| `tooltip` | `ReactNode \| RichStr` | — | Tooltip content. `string`/`RichStr` rendered via `Text`; `ReactNode` rendered as-is. `undefined` = no tooltip. |
+| `side` | `"top" \| "bottom" \| "left" \| "right"` | `"right"` | Which side the tooltip appears on |
+| `align` | `"start" \| "center" \| "end"` | `"center"` | Alignment along the tooltip's side axis |
+| `open` | `boolean` | — | Controlled open state. When omitted, uses default hover behavior. |
+| `onOpenChange` | `(open: boolean) => void` | — | Callback when open state changes. Use with `open` for controlled mode. |
+| `delayDuration` | `number` | — | Delay in ms before the tooltip appears on hover |
+| `sideOffset` | `number` | `4` | Distance in pixels between the trigger and the tooltip |
+
+## Usage
+
+```tsx
+import { Tooltip } from "@opal/components";
+
+// Uncontrolled (default hover behavior)
+<Tooltip tooltip="Delete this item">
+  <Button icon={SvgTrash} />
+</Tooltip>
+
+// Controlled
+const [isOpen, setIsOpen] = useState(false);
+<Tooltip tooltip="Details" open={isOpen} onOpenChange={setIsOpen}>
+  <Button icon={SvgInfo} />
+</Tooltip>
+
+// Conditional — no tooltip when undefined
+<Tooltip tooltip={isDisabled ? "Not available" : undefined}>
+  <Button>Action</Button>
+</Tooltip>
+```
+
+## Notes
+
+- Children must be a single element compatible with Radix `asChild` (DOM element or a component
+  that forwards refs).
+- `string` and `RichStr` content is rendered via `Text font="secondary-body" color="inherit"`.
+- `ReactNode` content is rendered as-is for custom tooltip layouts.
+- The `opal-tooltip` CSS class provides z-indexing, animations, and a `max-width: 20rem` cap.
--- a/web/lib/opal/src/components/tooltip/Tooltip.stories.tsx
+++ b/web/lib/opal/src/components/tooltip/Tooltip.stories.tsx
@@ -0,0 +1,63 @@
+import type { Meta, StoryObj } from "@storybook/react";
+import type { Decorator } from "@storybook/react";
+import * as TooltipPrimitive from "@radix-ui/react-tooltip";
+import { Tooltip } from "@opal/components/tooltip/components";
+import { Button } from "@opal/components/buttons/button/components";
+import { Card } from "@opal/components";
+
+const withTooltipProvider: Decorator = (Story) => (
+  <TooltipPrimitive.Provider>
+    <Story />
+  </TooltipPrimitive.Provider>
+);
+
+const meta: Meta<typeof Tooltip> = {
+  title: "opal/components/Tooltip",
+  component: Tooltip,
+  tags: ["autodocs"],
+  decorators: [withTooltipProvider],
+};
+
+export default meta;
+type Story = StoryObj<typeof Tooltip>;
+
+export const Default: Story = {
+  render: () => (
+    <Tooltip tooltip="This is a tooltip">
+      <Button prominence="secondary">Hover me</Button>
+    </Tooltip>
+  ),
+};
+
+export const Sides: Story = {
+  render: () => (
+    <div className="flex gap-8 items-center py-16 px-32">
+      {(["top", "right", "bottom", "left"] as const).map((side) => (
+        <Tooltip key={side} tooltip={`Tooltip on ${side}`} side={side}>
+          <Button prominence="secondary" size="sm">
+            {side}
+          </Button>
+        </Tooltip>
+      ))}
+    </div>
+  ),
+};
+
+export const OnCard: Story = {
+  render: () => (
+    <Tooltip tooltip="Card tooltip appears on hover">
+      <Card border="solid" padding="md">
+        <p className="text-sm">Hover this card</p>
+      </Card>
+    </Tooltip>
+  ),
+};
+
+export const NoTooltip: Story = {
+  name: "No tooltip (passthrough)",
+  render: () => (
+    <Tooltip tooltip={undefined}>
+      <Button prominence="secondary">No tooltip</Button>
+    </Tooltip>
+  ),
+};
--- a/web/lib/opal/src/components/tooltip/components.tsx
+++ b/web/lib/opal/src/components/tooltip/components.tsx
@@ -0,0 +1,131 @@
+"use client";
+
+import "@opal/components/tooltip/styles.css";
+import * as TooltipPrimitive from "@radix-ui/react-tooltip";
+import type { RichStr } from "@opal/types";
+import { Text } from "@opal/components";
+import { isRichStr } from "@opal/components/text/InlineMarkdown";
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+type TooltipSide = "top" | "bottom" | "left" | "right";
+type TooltipAlign = "start" | "center" | "end";
+
+interface TooltipProps {
+  /**
+   * Tooltip content shown on hover. When `undefined`, the tooltip is not
+   * rendered and children are returned as-is.
+   *
+   * - `string` or `RichStr` — rendered via `Text` with consistent styling.
+   * - `ReactNode` — rendered as-is for custom tooltip content.
+   */
+  tooltip?: React.ReactNode | RichStr;
+
+  /** Which side the tooltip appears on. @default "right" */
+  side?: TooltipSide;
+
+  /** Alignment along the tooltip's side axis. @default "center" */
+  align?: TooltipAlign;
+
+  /**
+   * Controlled open state. When provided, the tooltip's visibility is
+   * externally managed. When omitted, the tooltip uses Radix's default
+   * hover-based open handling.
+   */
+  open?: boolean;
+
+  /**
+   * Callback fired when the tooltip's open state changes. Use with `open`
+   * for controlled behavior.
+   */
+  onOpenChange?: (open: boolean) => void;
+
+  /**
+   * Delay in milliseconds before the tooltip appears on hover.
+   * Passed to `TooltipPrimitive.Root`.
+   */
+  delayDuration?: number;
+
+  /** Distance in pixels between the trigger and the tooltip. @default 4 */
+  sideOffset?: number;
+
+  /**
+   * Children to wrap. Must be a single element compatible with Radix
+   * `asChild` (i.e. a DOM element or a component that forwards refs).
+   */
+  children: React.ReactElement;
+}
+
+// ---------------------------------------------------------------------------
+// Tooltip
+// ---------------------------------------------------------------------------
+
+/**
+ * A minimal tooltip wrapper that shows content on hover.
+ *
+ * Renders nothing extra when `tooltip` is `undefined` — just passes children
+ * through. When `tooltip` is provided, wraps children with a Radix tooltip.
+ *
+ * Supports both uncontrolled (default hover behavior) and controlled
+ * (`open` + `onOpenChange`) modes.
+ *
+ * @example
+ * ```tsx
+ * import { Tooltip } from "@opal/components";
+ *
+ * // Uncontrolled (default)
+ * <Tooltip tooltip="Delete this item">
+ *   <Button icon={SvgTrash} />
+ * </Tooltip>
+ *
+ * // Controlled
+ * <Tooltip tooltip="Details" open={isOpen} onOpenChange={setIsOpen}>
+ *   <Button icon={SvgInfo} />
+ * </Tooltip>
+ * ```
+ */
+function Tooltip({
+  tooltip,
+  side = "right",
+  align = "center",
+  open,
+  onOpenChange,
+  delayDuration,
+  sideOffset = 4,
+  children,
+}: TooltipProps) {
+  if (tooltip == null) return children;
+
+  const content =
+    typeof tooltip === "string" || isRichStr(tooltip) ? (
+      <Text font="secondary-body" color="inherit">
+        {tooltip}
+      </Text>
+    ) : (
+      tooltip
+    );
+
+  return (
+    <TooltipPrimitive.Root
+      open={open}
+      onOpenChange={onOpenChange}
+      delayDuration={delayDuration}
+    >
+      <TooltipPrimitive.Trigger asChild>{children}</TooltipPrimitive.Trigger>
+      <TooltipPrimitive.Portal>
+        <TooltipPrimitive.Content
+          className="opal-tooltip"
+          side={side}
+          align={align}
+          sideOffset={sideOffset}
+        >
+          {content}
+        </TooltipPrimitive.Content>
+      </TooltipPrimitive.Portal>
+    </TooltipPrimitive.Root>
+  );
+}
+
+export { Tooltip, type TooltipProps, type TooltipSide, type TooltipAlign };
--- a/web/lib/opal/src/components/tooltip/styles.css
+++ b/web/lib/opal/src/components/tooltip/styles.css
@@ -1,5 +1,3 @@
-/* Shared tooltip content styling */
-
 .opal-tooltip {
  z-index: var(--z-tooltip, 1300);
  max-width: 20rem;
--- a/web/lib/opal/src/core/disabled/components.tsx
+++ b/web/lib/opal/src/core/disabled/components.tsx
@@ -1,11 +1,8 @@
 import "@opal/core/disabled/styles.css";
-import "@opal/components/tooltip.css";
 import React from "react";
-import * as TooltipPrimitive from "@radix-ui/react-tooltip";
 import { Slot } from "@radix-ui/react-slot";
-import type { TooltipSide } from "@opal/components";
+import { Tooltip, type TooltipSide } from "@opal/components";
 import type { RichStr } from "@opal/types";
-import { Text } from "@opal/components";

 // ---------------------------------------------------------------------------
 // Types
@@ -88,21 +85,10 @@ function Disabled({

  if (!showTooltip) return wrapper;

-  // TODO(@raunakab): Replace this raw Radix tooltip with the opalified
-  // Tooltip component once it lands.
  return (
-    <TooltipPrimitive.Root>
-      <TooltipPrimitive.Trigger asChild>{wrapper}</TooltipPrimitive.Trigger>
-      <TooltipPrimitive.Portal>
-        <TooltipPrimitive.Content
-          className="opal-tooltip"
-          side={tooltipSide}
-          sideOffset={4}
-        >
-          <Text font="secondary-body">{tooltip}</Text>
-        </TooltipPrimitive.Content>
-      </TooltipPrimitive.Portal>
-    </TooltipPrimitive.Root>
+    <Tooltip tooltip={tooltip} side={tooltipSide}>
+      {wrapper}
+    </Tooltip>
  );
 }

--- a/web/lib/opal/src/core/interactive/stateful/components.tsx
+++ b/web/lib/opal/src/core/interactive/stateful/components.tsx
@@ -15,6 +15,7 @@ type InteractiveStatefulVariant =
  | "select-heavy"
  | "select-card"
  | "select-tinted"
+  | "select-input"
  | "select-filter"
  | "sidebar-heavy"
  | "sidebar-light";
@@ -35,6 +36,7 @@ interface InteractiveStatefulProps
   * - `"select-heavy"` — tinted selected background (for list rows, model pickers)
   * - `"select-card"` — like select-heavy but filled state has a visible background (for cards/larger surfaces)
   * - `"select-tinted"` — like select-heavy but with a tinted rest background
+   * - `"select-input"` — rests at neutral-00 (matches input bar), hover/open shows neutral-03 + border-01
   * - `"select-filter"` — like select-tinted for empty/filled; selected state uses inverted tint backgrounds and inverted text (for filter buttons)
   * - `"sidebar-heavy"` — sidebar navigation items: muted when unselected (text-03/text-02), bold when selected (text-04/text-03)
   * - `"sidebar-light"` — sidebar navigation items: uniformly muted across all states (text-02/text-02)
--- a/web/lib/opal/src/core/interactive/stateful/styles.css
+++ b/web/lib/opal/src/core/interactive/stateful/styles.css
@@ -350,6 +350,41 @@
  --interactive-foreground-icon: var(--text-01);
 }

+/* ---------------------------------------------------------------------------
+   Select-Input — Empty
+   Matches input bar background at rest, tints on hover/open.
+   --------------------------------------------------------------------------- */
+.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"] {
+  @apply bg-background-neutral-00;
+  --interactive-foreground: var(--text-04);
+  --interactive-foreground-icon: var(--text-03);
+}
+.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"]:hover:not(
+    [data-disabled]
+  ),
+.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"][data-interaction="hover"]:not(
+    [data-disabled]
+  ) {
+  @apply bg-background-neutral-03;
+  --interactive-foreground: var(--text-04);
+  --interactive-foreground-icon: var(--text-03);
+}
+.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"]:active:not(
+    [data-disabled]
+  ),
+.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"][data-interaction="active"]:not(
+    [data-disabled]
+  ) {
+  @apply bg-background-neutral-03;
+  --interactive-foreground: var(--text-05);
+  --interactive-foreground-icon: var(--text-05);
+}
+.interactive[data-interactive-variant="select-input"][data-interactive-state="empty"][data-disabled] {
+  @apply bg-transparent;
+  --interactive-foreground: var(--text-01);
+  --interactive-foreground-icon: var(--text-01);
+}
+
 /* ---------------------------------------------------------------------------
   Select-Tinted — Filled
   --------------------------------------------------------------------------- */
--- a/web/lib/opal/src/layouts/inputs/components.tsx
+++ b/web/lib/opal/src/layouts/inputs/components.tsx
@@ -54,6 +54,8 @@ interface InputLayoutProps {
  withLabel?: boolean | string;

  disabled?: boolean;
+  /** Ref forwarded to the inner content `Section`. */
+  ref?: React.Ref<HTMLDivElement>;
  children?: React.ReactNode;
  title: string | RichStr;
  /** Tag rendered inline beside the title (passed through to Content). */
@@ -73,6 +75,7 @@ export interface VerticalProps extends InputLayoutProps {
 function Vertical({
  withLabel: withLabelProp = false,
  disabled,
+  ref,
  children,
  subDescription,
  title,
@@ -84,7 +87,7 @@ function Vertical({
    typeof withLabelProp === "string" ? withLabelProp : undefined;

  const content = (
-    <Section gap={0.25} alignItems="start">
+    <Section ref={ref} gap={0.25} alignItems="start">
      <Content
        title={title}
        description={description}
@@ -123,6 +126,7 @@ export interface HorizontalProps extends InputLayoutProps {
 function Horizontal({
  withLabel: withLabelProp = false,
  disabled,
+  ref,
  children,
  center,
  title,
@@ -134,7 +138,7 @@ function Horizontal({
    typeof withLabelProp === "string" ? withLabelProp : undefined;

  const content = (
-    <Section gap={0.25} alignItems="start">
+    <Section ref={ref} gap={0.25} alignItems="start">
      <Section
        flexDirection="row"
        justifyContent="between"
@@ -210,9 +214,14 @@ export type InputErrorType = "error" | "warning";
 interface InputErrorTextProps {
  children?: React.ReactNode;
  type?: InputErrorType;
+  ref?: React.Ref<HTMLDivElement>;
 }

-function InputErrorText({ children, type = "error" }: InputErrorTextProps) {
+function InputErrorText({
+  children,
+  type = "error",
+  ref,
+}: InputErrorTextProps) {
  const Icon = type === "error" ? SvgXOctagon : SvgAlertCircle;
  const colorClass =
    type === "error" ? "text-status-error-05" : "text-status-warning-05";
@@ -220,7 +229,7 @@ function InputErrorText({ children, type = "error" }: InputErrorTextProps) {
    type === "error" ? "stroke-status-error-05" : "stroke-status-warning-05";

  return (
-    <div className="px-1">
+    <div ref={ref} className="px-1">
      {/* TODO(@raunakab): update this with `Content` when it supports custom colours */}
      <Section flexDirection="row" justifyContent="start" gap={0.25}>
        <Icon size={12} className={strokeClass} />
@@ -250,10 +259,12 @@ function InputDivider() {
 // InputPadder
 // ---------------------------------------------------------------------------

-type InputPadderProps = WithoutStyles<React.HTMLAttributes<HTMLDivElement>>;
+type InputPadderProps = WithoutStyles<React.HTMLAttributes<HTMLDivElement>> & {
+  ref?: React.Ref<HTMLDivElement>;
+};

-function InputPadder(props: InputPadderProps) {
-  return <div {...props} className="p-2 w-full" />;
+function InputPadder({ ref, ...props }: InputPadderProps) {
+  return <div ref={ref} {...props} className="p-2 w-full" />;
 }

 // ---------------------------------------------------------------------------
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -62,7 +62,7 @@
        "mdast-util-find-and-replace": "^3.0.1",
        "mime": "^4.1.0",
        "motion": "^12.29.0",
-        "next": "16.1.7",
+        "next": "16.2.3",
        "next-themes": "^0.4.4",
        "postcss": "^8.5.6",
        "posthog-js": "^1.176.0",
@@ -1676,7 +1676,9 @@
      }
    },
    "node_modules/@img/colour": {
-      "version": "1.0.0",
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz",
+      "integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==",
      "license": "MIT",
      "optional": true,
      "engines": {
@@ -1788,9 +1790,9 @@
      }
    },
    "node_modules/@img/sharp-libvips-linux-ppc64": {
-      "version": "1.2.3",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.3.tgz",
-      "integrity": "sha512-Y2T7IsQvJLMCBM+pmPbM3bKT/yYJvVtLJGfCs4Sp95SjvnFIjynbjzsa7dY1fRJX45FTSfDksbTp6AGWudiyCg==",
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.4.tgz",
+      "integrity": "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==",
      "cpu": [
        "ppc64"
      ],
@@ -1803,6 +1805,22 @@
        "url": "https://opencollective.com/libvips"
      }
    },
+    "node_modules/@img/sharp-libvips-linux-riscv64": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-riscv64/-/sharp-libvips-linux-riscv64-1.2.4.tgz",
+      "integrity": "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==",
+      "cpu": [
+        "riscv64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
    "node_modules/@img/sharp-libvips-linux-s390x": {
      "version": "1.0.4",
      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.0.4.tgz",
@@ -1912,9 +1930,9 @@
      }
    },
    "node_modules/@img/sharp-linux-ppc64": {
-      "version": "0.34.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.4.tgz",
-      "integrity": "sha512-F4PDtF4Cy8L8hXA2p3TO6s4aDt93v+LKmpcYFLAVdkkD3hSxZzee0rh6/+94FpAynsuMpLX5h+LRsSG3rIciUQ==",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.5.tgz",
+      "integrity": "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==",
      "cpu": [
        "ppc64"
      ],
@@ -1930,7 +1948,29 @@
        "url": "https://opencollective.com/libvips"
      },
      "optionalDependencies": {
-        "@img/sharp-libvips-linux-ppc64": "1.2.3"
+        "@img/sharp-libvips-linux-ppc64": "1.2.4"
+      }
+    },
+    "node_modules/@img/sharp-linux-riscv64": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-riscv64/-/sharp-linux-riscv64-0.34.5.tgz",
+      "integrity": "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==",
+      "cpu": [
+        "riscv64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-riscv64": "1.2.4"
      }
    },
    "node_modules/@img/sharp-linux-s390x": {
@@ -2041,9 +2081,9 @@
      }
    },
    "node_modules/@img/sharp-win32-arm64": {
-      "version": "0.34.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.4.tgz",
-      "integrity": "sha512-2Q250do/5WXTwxW3zjsEuMSv5sUU4Tq9VThWKlU2EYLm4MB7ZeMwF+SFJutldYODXF6jzc6YEOC+VfX0SZQPqA==",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.5.tgz",
+      "integrity": "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==",
      "cpu": [
        "arm64"
      ],
@@ -2897,9 +2937,9 @@
      }
    },
    "node_modules/@next/env": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
-      "integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/env/-/env-16.2.3.tgz",
+      "integrity": "sha512-ZWXyj4uNu4GCWQw9cjRxWlbD+33mcDszIo9iQxFnBX3Wmgq9ulaSJcl6VhuWx5pCWqqD+9W6Wfz7N0lM5lYPMA==",
      "license": "MIT"
    },
    "node_modules/@next/eslint-plugin-next": {
@@ -2943,9 +2983,9 @@
      }
    },
    "node_modules/@next/swc-darwin-arm64": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
-      "integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.2.3.tgz",
+      "integrity": "sha512-u37KDKTKQ+OQLvY+z7SNXixwo4Q2/IAJFDzU1fYe66IbCE51aDSAzkNDkWmLN0yjTUh4BKBd+hb69jYn6qqqSg==",
      "cpu": [
        "arm64"
      ],
@@ -2959,9 +2999,9 @@
      }
    },
    "node_modules/@next/swc-darwin-x64": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
-      "integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.2.3.tgz",
+      "integrity": "sha512-gHjL/qy6Q6CG3176FWbAKyKh9IfntKZTB3RY/YOJdDFpHGsUDXVH38U4mMNpHVGXmeYW4wj22dMp1lTfmu/bTQ==",
      "cpu": [
        "x64"
      ],
@@ -2975,9 +3015,9 @@
      }
    },
    "node_modules/@next/swc-linux-arm64-gnu": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
-      "integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.2.3.tgz",
+      "integrity": "sha512-U6vtblPtU/P14Y/b/n9ZY0GOxbbIhTFuaFR7F4/uMBidCi2nSdaOFhA0Go81L61Zd6527+yvuX44T4ksnf8T+Q==",
      "cpu": [
        "arm64"
      ],
@@ -2991,9 +3031,9 @@
      }
    },
    "node_modules/@next/swc-linux-arm64-musl": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
-      "integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.2.3.tgz",
+      "integrity": "sha512-/YV0LgjHUmfhQpn9bVoGc4x4nan64pkhWR5wyEV8yCOfwwrH630KpvRg86olQHTwHIn1z59uh6JwKvHq1h4QEw==",
      "cpu": [
        "arm64"
      ],
@@ -3007,9 +3047,9 @@
      }
    },
    "node_modules/@next/swc-linux-x64-gnu": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
-      "integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.2.3.tgz",
+      "integrity": "sha512-/HiWEcp+WMZ7VajuiMEFGZ6cg0+aYZPqCJD3YJEfpVWQsKYSjXQG06vJP6F1rdA03COD9Fef4aODs3YxKx+RDQ==",
      "cpu": [
        "x64"
      ],
@@ -3023,9 +3063,9 @@
      }
    },
    "node_modules/@next/swc-linux-x64-musl": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
-      "integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.2.3.tgz",
+      "integrity": "sha512-Kt44hGJfZSefebhk/7nIdivoDr3Ugp5+oNz9VvF3GUtfxutucUIHfIO0ZYO8QlOPDQloUVQn4NVC/9JvHRk9hw==",
      "cpu": [
        "x64"
      ],
@@ -3039,9 +3079,9 @@
      }
    },
    "node_modules/@next/swc-win32-arm64-msvc": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
-      "integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.2.3.tgz",
+      "integrity": "sha512-O2NZ9ie3Tq6xj5Z5CSwBT3+aWAMW2PIZ4egUi9MaWLkwaehgtB7YZjPm+UpcNpKOme0IQuqDcor7BsW6QBiQBw==",
      "cpu": [
        "arm64"
      ],
@@ -3055,9 +3095,9 @@
      }
    },
    "node_modules/@next/swc-win32-x64-msvc": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
-      "integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.2.3.tgz",
+      "integrity": "sha512-Ibm29/GgB/ab5n7XKqlStkm54qqZE8v2FnijUPBgrd67FWrac45o/RsNlaOWjme/B5UqeWt/8KM4aWBwA1D2Kw==",
      "cpu": [
        "x64"
      ],
@@ -14088,12 +14128,12 @@
      "license": "MIT"
    },
    "node_modules/next": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
-      "integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
+      "version": "16.2.3",
+      "resolved": "https://registry.npmjs.org/next/-/next-16.2.3.tgz",
+      "integrity": "sha512-9V3zV4oZFza3PVev5/poB9g0dEafVcgNyQ8eTRop8GvxZjV2G15FC5ARuG1eFD42QgeYkzJBJzHghNP8Ad9xtA==",
      "license": "MIT",
      "dependencies": {
-        "@next/env": "16.1.7",
+        "@next/env": "16.2.3",
        "@swc/helpers": "0.5.15",
        "baseline-browser-mapping": "^2.9.19",
        "caniuse-lite": "^1.0.30001579",
@@ -14107,15 +14147,15 @@
        "node": ">=20.9.0"
      },
      "optionalDependencies": {
-        "@next/swc-darwin-arm64": "16.1.7",
-        "@next/swc-darwin-x64": "16.1.7",
-        "@next/swc-linux-arm64-gnu": "16.1.7",
-        "@next/swc-linux-arm64-musl": "16.1.7",
-        "@next/swc-linux-x64-gnu": "16.1.7",
-        "@next/swc-linux-x64-musl": "16.1.7",
-        "@next/swc-win32-arm64-msvc": "16.1.7",
-        "@next/swc-win32-x64-msvc": "16.1.7",
-        "sharp": "^0.34.4"
+        "@next/swc-darwin-arm64": "16.2.3",
+        "@next/swc-darwin-x64": "16.2.3",
+        "@next/swc-linux-arm64-gnu": "16.2.3",
+        "@next/swc-linux-arm64-musl": "16.2.3",
+        "@next/swc-linux-x64-gnu": "16.2.3",
+        "@next/swc-linux-x64-musl": "16.2.3",
+        "@next/swc-win32-arm64-msvc": "16.2.3",
+        "@next/swc-win32-x64-msvc": "16.2.3",
+        "sharp": "^0.34.5"
      },
      "peerDependencies": {
        "@opentelemetry/api": "^1.1.0",
@@ -14148,10 +14188,32 @@
        "react-dom": "^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc"
      }
    },
+    "node_modules/next/node_modules/@img/sharp-darwin-arm64": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.5.tgz",
+      "integrity": "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-darwin-arm64": "1.2.4"
+      }
+    },
    "node_modules/next/node_modules/@img/sharp-darwin-x64": {
-      "version": "0.34.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.4.tgz",
-      "integrity": "sha512-rZheupWIoa3+SOdF/IcUe1ah4ZDpKBGWcsPX6MT0lYniH9micvIU7HQkYTfrx5Xi8u+YqwLtxC/3vl8TQN6rMg==",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.5.tgz",
+      "integrity": "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==",
      "cpu": [
        "x64"
      ],
@@ -14167,13 +14229,29 @@
        "url": "https://opencollective.com/libvips"
      },
      "optionalDependencies": {
-        "@img/sharp-libvips-darwin-x64": "1.2.3"
+        "@img/sharp-libvips-darwin-x64": "1.2.4"
+      }
+    },
+    "node_modules/next/node_modules/@img/sharp-libvips-darwin-arm64": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.4.tgz",
+      "integrity": "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
      }
    },
    "node_modules/next/node_modules/@img/sharp-libvips-darwin-x64": {
-      "version": "1.2.3",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.3.tgz",
-      "integrity": "sha512-Ju+g2xn1E2AKO6YBhxjj+ACcsPQRHT0bhpglxcEf+3uyPY+/gL8veniKoo96335ZaPo03bdDXMv0t+BBFAbmRA==",
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.4.tgz",
+      "integrity": "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==",
      "cpu": [
        "x64"
      ],
@@ -14187,9 +14265,9 @@
      }
    },
    "node_modules/next/node_modules/@img/sharp-libvips-linux-arm": {
-      "version": "1.2.3",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.3.tgz",
-      "integrity": "sha512-x1uE93lyP6wEwGvgAIV0gP6zmaL/a0tGzJs/BIDDG0zeBhMnuUPm7ptxGhUbcGs4okDJrk4nxgrmxpib9g6HpA==",
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.4.tgz",
+      "integrity": "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==",
      "cpu": [
        "arm"
      ],
@@ -14203,9 +14281,9 @@
      }
    },
    "node_modules/next/node_modules/@img/sharp-libvips-linux-arm64": {
-      "version": "1.2.3",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.3.tgz",
-      "integrity": "sha512-I4RxkXU90cpufazhGPyVujYwfIm9Nk1QDEmiIsaPwdnm013F7RIceaCc87kAH+oUB1ezqEvC6ga4m7MSlqsJvQ==",
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.4.tgz",
+      "integrity": "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==",
      "cpu": [
        "arm64"
      ],
@@ -14219,9 +14297,9 @@
      }
    },
    "node_modules/next/node_modules/@img/sharp-libvips-linux-s390x": {
-      "version": "1.2.3",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.3.tgz",
-      "integrity": "sha512-RgWrs/gVU7f+K7P+KeHFaBAJlNkD1nIZuVXdQv6S+fNA6syCcoboNjsV2Pou7zNlVdNQoQUpQTk8SWDHUA3y/w==",
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.4.tgz",
+      "integrity": "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==",
      "cpu": [
        "s390x"
      ],
@@ -14235,9 +14313,9 @@
      }
    },
    "node_modules/next/node_modules/@img/sharp-libvips-linux-x64": {
-      "version": "1.2.3",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.3.tgz",
-      "integrity": "sha512-3JU7LmR85K6bBiRzSUc/Ff9JBVIFVvq6bomKE0e63UXGeRw2HPVEjoJke1Yx+iU4rL7/7kUjES4dZ/81Qjhyxg==",
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.4.tgz",
+      "integrity": "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==",
      "cpu": [
        "x64"
      ],
@@ -14251,9 +14329,9 @@
      }
    },
    "node_modules/next/node_modules/@img/sharp-libvips-linuxmusl-arm64": {
-      "version": "1.2.3",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.3.tgz",
-      "integrity": "sha512-F9q83RZ8yaCwENw1GieztSfj5msz7GGykG/BA+MOUefvER69K/ubgFHNeSyUu64amHIYKGDs4sRCMzXVj8sEyw==",
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.4.tgz",
+      "integrity": "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==",
      "cpu": [
        "arm64"
      ],
@@ -14267,9 +14345,9 @@
      }
    },
    "node_modules/next/node_modules/@img/sharp-libvips-linuxmusl-x64": {
-      "version": "1.2.3",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.3.tgz",
-      "integrity": "sha512-U5PUY5jbc45ANM6tSJpsgqmBF/VsL6LnxJmIf11kB7J5DctHgqm0SkuXzVWtIY90GnJxKnC/JT251TDnk1fu/g==",
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.4.tgz",
+      "integrity": "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==",
      "cpu": [
        "x64"
      ],
@@ -14283,9 +14361,9 @@
      }
    },
    "node_modules/next/node_modules/@img/sharp-linux-arm": {
-      "version": "0.34.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.4.tgz",
-      "integrity": "sha512-Xyam4mlqM0KkTHYVSuc6wXRmM7LGN0P12li03jAnZ3EJWZqj83+hi8Y9UxZUbxsgsK1qOEwg7O0Bc0LjqQVtxA==",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.5.tgz",
+      "integrity": "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==",
      "cpu": [
        "arm"
      ],
@@ -14301,13 +14379,13 @@
        "url": "https://opencollective.com/libvips"
      },
      "optionalDependencies": {
-        "@img/sharp-libvips-linux-arm": "1.2.3"
+        "@img/sharp-libvips-linux-arm": "1.2.4"
      }
    },
    "node_modules/next/node_modules/@img/sharp-linux-arm64": {
-      "version": "0.34.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.4.tgz",
-      "integrity": "sha512-YXU1F/mN/Wu786tl72CyJjP/Ngl8mGHN1hST4BGl+hiW5jhCnV2uRVTNOcaYPs73NeT/H8Upm3y9582JVuZHrQ==",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.5.tgz",
+      "integrity": "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==",
      "cpu": [
        "arm64"
      ],
@@ -14323,13 +14401,13 @@
        "url": "https://opencollective.com/libvips"
      },
      "optionalDependencies": {
-        "@img/sharp-libvips-linux-arm64": "1.2.3"
+        "@img/sharp-libvips-linux-arm64": "1.2.4"
      }
    },
    "node_modules/next/node_modules/@img/sharp-linux-s390x": {
-      "version": "0.34.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.4.tgz",
-      "integrity": "sha512-qVrZKE9Bsnzy+myf7lFKvng6bQzhNUAYcVORq2P7bDlvmF6u2sCmK2KyEQEBdYk+u3T01pVsPrkj943T1aJAsw==",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.5.tgz",
+      "integrity": "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==",
      "cpu": [
        "s390x"
      ],
@@ -14345,13 +14423,13 @@
        "url": "https://opencollective.com/libvips"
      },
      "optionalDependencies": {
-        "@img/sharp-libvips-linux-s390x": "1.2.3"
+        "@img/sharp-libvips-linux-s390x": "1.2.4"
      }
    },
    "node_modules/next/node_modules/@img/sharp-linux-x64": {
-      "version": "0.34.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.4.tgz",
-      "integrity": "sha512-ZfGtcp2xS51iG79c6Vhw9CWqQC8l2Ot8dygxoDoIQPTat/Ov3qAa8qpxSrtAEAJW+UjTXc4yxCjNfxm4h6Xm2A==",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.5.tgz",
+      "integrity": "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==",
      "cpu": [
        "x64"
      ],
@@ -14367,13 +14445,13 @@
        "url": "https://opencollective.com/libvips"
      },
      "optionalDependencies": {
-        "@img/sharp-libvips-linux-x64": "1.2.3"
+        "@img/sharp-libvips-linux-x64": "1.2.4"
      }
    },
    "node_modules/next/node_modules/@img/sharp-linuxmusl-arm64": {
-      "version": "0.34.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.4.tgz",
-      "integrity": "sha512-8hDVvW9eu4yHWnjaOOR8kHVrew1iIX+MUgwxSuH2XyYeNRtLUe4VNioSqbNkB7ZYQJj9rUTT4PyRscyk2PXFKA==",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.5.tgz",
+      "integrity": "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==",
      "cpu": [
        "arm64"
      ],
@@ -14389,13 +14467,13 @@
        "url": "https://opencollective.com/libvips"
      },
      "optionalDependencies": {
-        "@img/sharp-libvips-linuxmusl-arm64": "1.2.3"
+        "@img/sharp-libvips-linuxmusl-arm64": "1.2.4"
      }
    },
    "node_modules/next/node_modules/@img/sharp-linuxmusl-x64": {
-      "version": "0.34.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.4.tgz",
-      "integrity": "sha512-lU0aA5L8QTlfKjpDCEFOZsTYGn3AEiO6db8W5aQDxj0nQkVrZWmN3ZP9sYKWJdtq3PWPhUNlqehWyXpYDcI9Sg==",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.5.tgz",
+      "integrity": "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==",
      "cpu": [
        "x64"
      ],
@@ -14411,20 +14489,20 @@
        "url": "https://opencollective.com/libvips"
      },
      "optionalDependencies": {
-        "@img/sharp-libvips-linuxmusl-x64": "1.2.3"
+        "@img/sharp-libvips-linuxmusl-x64": "1.2.4"
      }
    },
    "node_modules/next/node_modules/@img/sharp-wasm32": {
-      "version": "0.34.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.4.tgz",
-      "integrity": "sha512-33QL6ZO/qpRyG7woB/HUALz28WnTMI2W1jgX3Nu2bypqLIKx/QKMILLJzJjI+SIbvXdG9fUnmrxR7vbi1sTBeA==",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.5.tgz",
+      "integrity": "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==",
      "cpu": [
        "wasm32"
      ],
      "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT",
      "optional": true,
      "dependencies": {
-        "@emnapi/runtime": "^1.5.0"
+        "@emnapi/runtime": "^1.7.0"
      },
      "engines": {
        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
@@ -14434,9 +14512,9 @@
      }
    },
    "node_modules/next/node_modules/@img/sharp-win32-ia32": {
-      "version": "0.34.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.4.tgz",
-      "integrity": "sha512-3ZeLue5V82dT92CNL6rsal6I2weKw1cYu+rGKm8fOCCtJTR2gYeUfY3FqUnIJsMUPIH68oS5jmZ0NiJ508YpEw==",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.5.tgz",
+      "integrity": "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==",
      "cpu": [
        "ia32"
      ],
@@ -14453,9 +14531,9 @@
      }
    },
    "node_modules/next/node_modules/@img/sharp-win32-x64": {
-      "version": "0.34.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.4.tgz",
-      "integrity": "sha512-xIyj4wpYs8J18sVN3mSQjwrw7fKUqRw+Z5rnHNCy5fYTxigBz81u5mOMPmFumwjcn8+ld1ppptMBCLic1nz6ig==",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.5.tgz",
+      "integrity": "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==",
      "cpu": [
        "x64"
      ],
@@ -14498,14 +14576,16 @@
      }
    },
    "node_modules/next/node_modules/sharp": {
-      "version": "0.34.4",
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz",
+      "integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==",
      "hasInstallScript": true,
      "license": "Apache-2.0",
      "optional": true,
      "dependencies": {
        "@img/colour": "^1.0.0",
-        "detect-libc": "^2.1.0",
-        "semver": "^7.7.2"
+        "detect-libc": "^2.1.2",
+        "semver": "^7.7.3"
      },
      "engines": {
        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
@@ -14514,62 +14594,30 @@
        "url": "https://opencollective.com/libvips"
      },
      "optionalDependencies": {
-        "@img/sharp-darwin-arm64": "0.34.4",
-        "@img/sharp-darwin-x64": "0.34.4",
-        "@img/sharp-libvips-darwin-arm64": "1.2.3",
-        "@img/sharp-libvips-darwin-x64": "1.2.3",
-        "@img/sharp-libvips-linux-arm": "1.2.3",
-        "@img/sharp-libvips-linux-arm64": "1.2.3",
-        "@img/sharp-libvips-linux-ppc64": "1.2.3",
-        "@img/sharp-libvips-linux-s390x": "1.2.3",
-        "@img/sharp-libvips-linux-x64": "1.2.3",
-        "@img/sharp-libvips-linuxmusl-arm64": "1.2.3",
-        "@img/sharp-libvips-linuxmusl-x64": "1.2.3",
-        "@img/sharp-linux-arm": "0.34.4",
-        "@img/sharp-linux-arm64": "0.34.4",
-        "@img/sharp-linux-ppc64": "0.34.4",
-        "@img/sharp-linux-s390x": "0.34.4",
-        "@img/sharp-linux-x64": "0.34.4",
-        "@img/sharp-linuxmusl-arm64": "0.34.4",
-        "@img/sharp-linuxmusl-x64": "0.34.4",
-        "@img/sharp-wasm32": "0.34.4",
-        "@img/sharp-win32-arm64": "0.34.4",
-        "@img/sharp-win32-ia32": "0.34.4",
-        "@img/sharp-win32-x64": "0.34.4"
-      }
-    },
-    "node_modules/next/node_modules/sharp/node_modules/@img/sharp-darwin-arm64": {
-      "version": "0.34.4",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-darwin-arm64": "1.2.3"
-      }
-    },
-    "node_modules/next/node_modules/sharp/node_modules/@img/sharp-libvips-darwin-arm64": {
-      "version": "1.2.3",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
+        "@img/sharp-darwin-arm64": "0.34.5",
+        "@img/sharp-darwin-x64": "0.34.5",
+        "@img/sharp-libvips-darwin-arm64": "1.2.4",
+        "@img/sharp-libvips-darwin-x64": "1.2.4",
+        "@img/sharp-libvips-linux-arm": "1.2.4",
+        "@img/sharp-libvips-linux-arm64": "1.2.4",
+        "@img/sharp-libvips-linux-ppc64": "1.2.4",
+        "@img/sharp-libvips-linux-riscv64": "1.2.4",
+        "@img/sharp-libvips-linux-s390x": "1.2.4",
+        "@img/sharp-libvips-linux-x64": "1.2.4",
+        "@img/sharp-libvips-linuxmusl-arm64": "1.2.4",
+        "@img/sharp-libvips-linuxmusl-x64": "1.2.4",
+        "@img/sharp-linux-arm": "0.34.5",
+        "@img/sharp-linux-arm64": "0.34.5",
+        "@img/sharp-linux-ppc64": "0.34.5",
+        "@img/sharp-linux-riscv64": "0.34.5",
+        "@img/sharp-linux-s390x": "0.34.5",
+        "@img/sharp-linux-x64": "0.34.5",
+        "@img/sharp-linuxmusl-arm64": "0.34.5",
+        "@img/sharp-linuxmusl-x64": "0.34.5",
+        "@img/sharp-wasm32": "0.34.5",
+        "@img/sharp-win32-arm64": "0.34.5",
+        "@img/sharp-win32-ia32": "0.34.5",
+        "@img/sharp-win32-x64": "0.34.5"
      }
    },
    "node_modules/node-fetch": {
--- a/web/package.json
+++ b/web/package.json
@@ -80,7 +80,7 @@
    "mdast-util-find-and-replace": "^3.0.1",
    "mime": "^4.1.0",
    "motion": "^12.29.0",
-    "next": "16.1.7",
+    "next": "16.2.3",
    "next-themes": "^0.4.4",
    "postcss": "^8.5.6",
    "posthog-js": "^1.176.0",
--- a/web/src/app/admin/add-connector/page.tsx
+++ b/web/src/app/admin/add-connector/page.tsx
@@ -12,12 +12,7 @@ import {
  useRef,
  useState,
 } from "react";
-import {
-  Tooltip,
-  TooltipContent,
-  TooltipProvider,
-  TooltipTrigger,
-} from "@/components/ui/tooltip";
+import { Tooltip } from "@opal/components";
 import { useFederatedConnectors } from "@/lib/hooks";
 import {
  FederatedConnectorDetail,
@@ -96,33 +91,31 @@ function SourceTileTooltipWrapper({
  }

  return (
-    <TooltipProvider>
-      <Tooltip>
-        <TooltipTrigger asChild>
-          <div>
-            <SourceTile
-              sourceMetadata={sourceMetadata}
-              preSelect={preSelect}
-              navigationUrl={navigationUrl}
-              hasExistingSlackCredentials={!!hasExistingSlackCredentials}
-            />
-          </div>
-        </TooltipTrigger>
-        <TooltipContent side="top" className="max-w-sm">
-          {existingFederatedConnector ? (
-            <Text as="p" textLight05 secondaryBody>
-              <strong>Federated connector already configured.</strong> Click to
-              edit the existing connector.
-            </Text>
-          ) : hasExistingSlackCredentials ? (
-            <Text as="p" textLight05 secondaryBody>
-              <strong>Existing Slack credentials found.</strong> Click to manage
-              your Slack connector.
-            </Text>
-          ) : null}
-        </TooltipContent>
-      </Tooltip>
-    </TooltipProvider>
+    <Tooltip
+      side="top"
+      tooltip={
+        existingFederatedConnector ? (
+          <Text as="p" textLight05 secondaryBody>
+            <strong>Federated connector already configured.</strong> Click to
+            edit the existing connector.
+          </Text>
+        ) : hasExistingSlackCredentials ? (
+          <Text as="p" textLight05 secondaryBody>
+            <strong>Existing Slack credentials found.</strong> Click to manage
+            your Slack connector.
+          </Text>
+        ) : undefined
+      }
+    >
+      <div>
+        <SourceTile
+          sourceMetadata={sourceMetadata}
+          preSelect={preSelect}
+          navigationUrl={navigationUrl}
+          hasExistingSlackCredentials={!!hasExistingSlackCredentials}
+        />
+      </div>
+    </Tooltip>
  );
 }

--- a/web/src/app/admin/billing/BillingDetailsView.tsx
+++ b/web/src/app/admin/billing/BillingDetailsView.tsx
@@ -6,9 +6,8 @@ import { Section } from "@/layouts/general-layouts";
 import { Content, InputErrorText, InputVertical } from "@opal/layouts";
 import Card from "@/refresh-components/cards/Card";
 import Button from "@/refresh-components/buttons/Button";
-import { Button as OpalButton } from "@opal/components";
+import { Button as OpalButton, MessageCard } from "@opal/components";
 import Text from "@/refresh-components/texts/Text";
-import Message from "@/refresh-components/messages/Message";
 import InfoBlock from "@/refresh-components/messages/InfoBlock";
 import InputNumber from "@/refresh-components/inputs/InputNumber";
 import {
@@ -637,35 +636,27 @@ export default function BillingDetailsView({
    <Section gap={1} height="auto" width="full">
      {/* Stripe connection error banner */}
      {hasStripeError && (
-        <Message
-          static
-          warning
-          text="Unable to connect to Stripe payment portal."
+        <MessageCard
+          variant="warning"
+          title="Unable to connect to Stripe payment portal."
          description="Check your internet connection or manually provide a license."
-          close={false}
-          className="w-full"
        />
      )}

      {/* Air-gapped mode info banner */}
      {isAirGapped && !hasStripeError && !isManualLicenseOnly && (
-        <Message
-          static
-          info
-          text="Air-gapped deployment"
+        <MessageCard
+          variant="info"
+          title="Air-gapped deployment"
          description="Online billing management is disabled. Contact support to update your subscription."
-          close={false}
-          className="w-full"
        />
      )}

      {/* Expiration banner */}
      {expirationState && (
-        <Message
-          static
-          warning={expirationState.variant === "warning"}
-          error={expirationState.variant === "error"}
-          text={
+        <MessageCard
+          variant={expirationState.variant}
+          title={
            expirationState.variant === "error"
              ? expirationState.daysUntilDeletion
                ? `Your subscription has expired. Data will be deleted in ${expirationState.daysUntilDeletion} days.`
@@ -679,8 +670,6 @@ export default function BillingDetailsView({
                : "Renew your subscription to restore access to paid features."
              : `Renew your subscription by ${expirationState.expirationDate} to avoid disruption.`
          }
-          close={false}
-          className="w-full"
        />
      )}

--- a/web/src/app/admin/billing/page.test.tsx
+++ b/web/src/app/admin/billing/page.test.tsx
@@ -69,30 +69,33 @@ jest.mock("./LicenseActivationCard", () => ({
  default: () => <div data-testid="license-activation-card" />,
 }));

-jest.mock("@/refresh-components/messages/Message", () => ({
-  __esModule: true,
-  default: ({
-    text,
-    description,
-    onClose,
-  }: {
-    text: string;
-    description?: string;
-    onClose?: () => void;
-  }) => (
-    <div data-testid="activating-banner">
-      <span data-testid="activating-banner-text">{text}</span>
-      {description && (
-        <span data-testid="activating-banner-description">{description}</span>
-      )}
-      {onClose && (
-        <button data-testid="activating-banner-close" onClick={onClose}>
-          Close
-        </button>
-      )}
-    </div>
-  ),
-}));
+jest.mock("@opal/components", () => {
+  const actual = jest.requireActual("@opal/components");
+  return {
+    ...actual,
+    MessageCard: ({
+      title,
+      description,
+      onClose,
+    }: {
+      title: string;
+      description?: string;
+      onClose?: () => void;
+    }) => (
+      <div data-testid="activating-banner">
+        <span data-testid="activating-banner-text">{title}</span>
+        {description && (
+          <span data-testid="activating-banner-description">{description}</span>
+        )}
+        {onClose && (
+          <button data-testid="activating-banner-close" onClick={onClose}>
+            Close
+          </button>
+        )}
+      </div>
+    ),
+  };
+});

 jest.mock("@/lib/billing", () => ({
  useBillingInformation: jest.fn(),
--- a/web/src/app/admin/billing/page.tsx
+++ b/web/src/app/admin/billing/page.tsx
@@ -19,7 +19,7 @@ import {
 import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants";
 import { SWR_KEYS } from "@/lib/swr-keys";
 import { useUser } from "@/providers/UserProvider";
-import Message from "@/refresh-components/messages/Message";
+import { MessageCard } from "@opal/components";

 import PlansView from "./PlansView";
 import CheckoutView from "./CheckoutView";
@@ -484,19 +484,14 @@ export default function BillingPage() {
      <SettingsLayouts.Body>
        <div className="flex flex-col items-center gap-6">
          {isActivating && (
-            <Message
-              static
-              warning
-              large
-              text="Your license is still activating"
+            <MessageCard
+              variant="warning"
+              title="Your license is still activating"
              description="Your license is being processed. You'll be taken to billing details automatically once confirmed."
-              icon
-              close
              onClose={() => {
                sessionStorage.removeItem(BILLING_ACTIVATING_KEY);
                setIsActivating(false);
              }}
-              className="w-full"
            />
          )}
          {renderContent()}
--- a/web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigFormFields.tsx
+++ b/web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigFormFields.tsx
@@ -23,12 +23,7 @@ import { RadioGroupItemField } from "@/components/ui/RadioGroupItemField";
 import { AlertCircle } from "lucide-react";
 import { useRouter } from "next/navigation";
 import type { Route } from "next";
-import {
-  Tooltip,
-  TooltipContent,
-  TooltipTrigger,
-} from "@/components/ui/tooltip";
-import { TooltipProvider } from "@radix-ui/react-tooltip";
+import { Tooltip } from "@opal/components";
 import { SourceIcon } from "@/components/SourceIcon";
 import Link from "next/link";
 import AgentAvatar from "@/refresh-components/avatars/AgentAvatar";
@@ -570,14 +565,10 @@ export function SlackChannelConfigFormFields({

      <div className="flex mt-8 gap-x-2 w-full justify-end">
        {shouldShowPrivacyAlert && (
-          <TooltipProvider>
-            <Tooltip>
-              <TooltipTrigger asChild>
-                <div className="flex hover:bg-background-150 cursor-pointer p-2 rounded-lg items-center">
-                  <AlertCircle className="h-5 w-5 text-alert" />
-                </div>
-              </TooltipTrigger>
-              <TooltipContent side="top" className="bg-background p-4 w-80">
+          <Tooltip
+            side="top"
+            tooltip={
+              <div className="space-y-2">
                <Label className="text-text mb-2 font-semibold">
                  Privacy Alert
                </Label>
@@ -615,9 +606,13 @@ export function SlackChannelConfigFormFields({
                    ))}
                  </div>
                </div>
-              </TooltipContent>
-            </Tooltip>
-          </TooltipProvider>
+              </div>
+            }
+          >
+            <div className="flex hover:bg-background-150 cursor-pointer p-2 rounded-lg items-center">
+              <AlertCircle className="h-5 w-5 text-alert" />
+            </div>
+          </Tooltip>
        )}
        <Button type="submit">{isUpdate ? "Update" : "Create"}</Button>
        <Button prominence="secondary" onClick={() => router.back()}>
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Justin Tahara	73f9a47364	fix(xlsx): Openpyxl Formatting Issues (#10230 )	2026-04-15 21:22:58 +00:00
Raunak Bhagat	a808445d96	feat: opalify `MessageCard` (#10223 )	2026-04-15 21:11:18 +00:00
Nikolas Garza	c31215197a	fix(chat): hide incomplete citation links during streaming (#10224 )	2026-04-15 21:10:06 +00:00
Nikolas Garza	9ebd9ebd73	fix(chat): snap typewriter to full content on tab re-focus (#10226 )	2026-04-15 21:07:00 +00:00
Nikolas Garza	f0bb0a6bb0	fix(chat): only header click selects preferred in multi-model panels (#10198 )	2026-04-15 21:06:19 +00:00
Ben Wu	01bec19d19	feat(canvas): checkpoint logic (3/4) (#9807 )	2026-04-15 20:48:16 +00:00
Danelegend	7b40c2cde7	feat(indexing): CSV Chunker - Field-Value Implementation (#10099 )	2026-04-15 19:57:50 +00:00
Jamison Lahman	e2c38d2899	chore(devtools): connect databases and github remote to devcontainer (#10222 )	2026-04-15 19:50:11 +00:00
Nikolas Garza	24768f9e4f	feat(metrics): replace pull-based connector metrics with push-based for multi-tenant (#10189 )	2026-04-15 18:15:34 +00:00
Bo-Onyx	aec1c169b6	feat(pruning): pruning grafana dashboard for single tenant (#10208 )	2026-04-15 17:50:28 +00:00
Jamison Lahman	5a16ad3473	chore(tests): avoid openapi client import in tests (#10220 )	2026-04-15 17:38:24 +00:00
dependabot[bot]	7e28e59f23	chore(deps): bump transformers from 4.53.0 to 5.5.4 (#9987 ) Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-04-15 10:39:50 -07:00
Nikolas Garza	879ae6c02d	feat(monitoring): add local Prometheus + Grafana docker-compose stack (#9627 )	2026-04-15 17:25:28 +00:00
Nikolas Garza	f84f367eb4	fix(voice): send TTS text in POST body instead of query params (#10213 )	2026-04-15 17:20:29 +00:00
Jamison Lahman	d81efe3877	fix(ollama): always include model tag in display name (#10218 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2026-04-15 09:17:37 -07:00
Nikolas Garza	d4619f93c4	feat(indexing): notify admins when connector enters repeated error state (#10207 )	2026-04-15 06:10:25 +00:00
Nikolas Garza	70fcfb1d73	feat(indexing): add admin API for failed documents (#10204 )	2026-04-15 06:10:06 +00:00
Nikolas Garza	32ba393b32	fix(chat): keep model selector popover open until max models reached (#10203 )	2026-04-15 06:09:24 +00:00
Nikolas Garza	f9d2bf78ed	fix(chat): disable hover/pointer states on multi-model panels during streaming (#10202 )	2026-04-15 06:09:11 +00:00
Nikolas Garza	5567a078fe	fix(chat): fix fade gradient missing on last multi-model panel (#10199 )	2026-04-15 06:08:48 +00:00
Raunak Bhagat	fc0e8560bc	feat: opalify Tooltip component, migrate all consumers (#10210 )	2026-04-15 03:42:15 +00:00
Nikolas Garza	60b2701eed	feat(indexing): add diagnostic logging to check_for_indexing beat task (#10200 )	2026-04-14 20:29:47 -07:00
Jamison Lahman	3682d9844b	fix(fe): handle file attachment overflow (#10211 )	2026-04-15 02:00:58 +00:00
Raunak Bhagat	a420f9a37c	feat: add ref forwarding to input layout components (#10206 )	2026-04-15 00:20:50 +00:00
Jamison Lahman	20c5107ba6	chore(devtools): install java runtime into devcontainer (#10197 )	2026-04-14 23:10:12 +00:00
Nikolas Garza	357bc91aee	feat(indexing): capture swallowed per-doc exceptions in Sentry (#10149 )	2026-04-14 23:01:42 +00:00
Nikolas Garza	09653872a2	fix(chat): render inline citation chips in multi-model panels (#10196 )	2026-04-14 22:59:10 +00:00
dependabot[bot]	ff01a53f83	chore(deps): bump next from 16.1.7 to 16.2.3 in /web (#10195 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-04-14 22:49:31 +00:00
Danelegend	03ddd5ca9b	feat(indexing): Add TabularSection (#10095 )	2026-04-14 22:16:35 +00:00
Bo-Onyx	8c49e4573c	fix(pruning): Skip Permission Sync During Google Drive Pruning (#10185 )	2026-04-14 22:14:09 +00:00
Jamison Lahman	f1696ffa16	chore(deps): upgrade playwright: 1.55.0->1.58.0 (#10194 )	2026-04-14 15:12:14 -07:00
Jamison Lahman	a427cb5b0c	chore(deps): upgrade python patch version in docker (#10192 )	2026-04-14 15:10:00 -07:00
Evan Lohn	f7e4be18dd	fix: uploaded files as knowledge source (#10167 )	2026-04-14 21:51:00 +00:00