feat(hook) frontend ee

fix(perf): optimize chat sessions query to prevent DB cascading failures (#9802 )
fix: install early exit (#9818 )
2026-04-01 13:02:42 +00:00 · 2026-03-31 18:54:14 -07:00 · 2026-04-01 01:28:37 +00:00 · 2026-04-01 01:09:05 +00:00 · 2026-04-01 01:08:38 +00:00 · 2026-04-01 01:07:55 +00:00
245 changed files with 12176 additions and 4015 deletions
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
@@ -704,6 +704,9 @@ jobs:
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
            NODE_OPTIONS=--max-old-space-size=8192
+            SENTRY_RELEASE=${{ github.sha }}
+          secrets: |
+            sentry_auth_token=${{ secrets.SENTRY_AUTH_TOKEN }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-amd64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
@@ -786,6 +789,9 @@ jobs:
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
            NODE_OPTIONS=--max-old-space-size=8192
+            SENTRY_RELEASE=${{ github.sha }}
+          secrets: |
+            sentry_auth_token=${{ secrets.SENTRY_AUTH_TOKEN }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-arm64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
--- a/.github/workflows/nightly-llm-provider-chat.yml
+++ b/.github/workflows/nightly-llm-provider-chat.yml
@@ -35,6 +35,7 @@ jobs:
    needs: [provider-chat-test]
    if: failure() && github.event_name == 'schedule'
    runs-on: ubuntu-slim
+    environment: ci-protected
    timeout-minutes: 5
    steps:
      - name: Checkout
--- a/.github/workflows/post-merge-beta-cherry-pick.yml
+++ b/.github/workflows/post-merge-beta-cherry-pick.yml
@@ -183,6 +183,7 @@ jobs:
      - cherry-pick-to-latest-release
    if: needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && needs.resolve-cherry-pick-request.result == 'success' && needs.cherry-pick-to-latest-release.result == 'success'
    runs-on: ubuntu-slim
+    environment: ci-protected
    timeout-minutes: 10
    steps:
      - name: Checkout
@@ -232,6 +233,7 @@ jobs:
      - cherry-pick-to-latest-release
    if: always() && needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && (needs.resolve-cherry-pick-request.result == 'failure' || needs.cherry-pick-to-latest-release.result == 'failure')
    runs-on: ubuntu-slim
+    environment: ci-protected
    timeout-minutes: 10
    steps:
      - name: Checkout
--- a/.github/workflows/pr-desktop-build.yml
+++ b/.github/workflows/pr-desktop-build.yml
@@ -63,7 +63,7 @@ jobs:
          targets: ${{ matrix.target }}

      - name: Cache Cargo registry and build
-        uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # zizmor: ignore[cache-poisoning]
+        uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # zizmor: ignore[cache-poisoning]
        with:
          path: |
            ~/.cargo/bin/
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -41,7 +41,7 @@ jobs:
          version: v3.19.0

      - name: Set up chart-testing
-        uses: helm/chart-testing-action@b5eebdd9998021f29756c53432f48dab66394810
+        uses: helm/chart-testing-action@2e2940618cb426dce2999631d543b53cdcfc8527
        with:
          uv_version: "0.9.9"

--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -284,7 +284,7 @@ jobs:

      - name: Cache playwright cache
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
-        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
        with:
          path: ~/.cache/ms-playwright
          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
@@ -626,7 +626,7 @@ jobs:

      - name: Cache playwright cache
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
-        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
        with:
          path: ~/.cache/ms-playwright
          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -56,7 +56,7 @@ jobs:

      - name: Cache mypy cache
        if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
-        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
        with:
          path: .mypy_cache
          key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'pyproject.toml') }}
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -31,6 +31,7 @@ jobs:
      - runner=4cpu-linux-arm64
      - "run-id=${{ github.run_id }}-model-check"
      - "extras=ecr-cache"
+    environment: ci-protected
    timeout-minutes: 45

    env:
--- a/.github/workflows/preview.yml
+++ b/.github/workflows/preview.yml
@@ -15,6 +15,7 @@ permissions:
 jobs:
  Deploy-Preview:
    runs-on: ubuntu-latest
+    environment: ci-protected
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
--- a/.github/workflows/storybook-deploy.yml
+++ b/.github/workflows/storybook-deploy.yml
@@ -25,6 +25,7 @@ permissions:
 jobs:
  Deploy-Storybook:
    runs-on: ubuntu-latest
+    environment: ci-protected
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v4
@@ -54,6 +55,7 @@ jobs:
    needs: Deploy-Storybook
    if: always() && needs.Deploy-Storybook.result == 'failure'
    runs-on: ubuntu-latest
+    environment: ci-protected
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v4
--- a/.github/workflows/sync_foss.yml
+++ b/.github/workflows/sync_foss.yml
@@ -9,6 +9,7 @@ on:
 jobs:
  sync-foss:
    runs-on: ubuntu-latest
+    environment: ci-protected
    timeout-minutes: 45
    permissions:
      contents: read
--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -11,6 +11,7 @@ permissions:
 jobs:
  create-and-push-tag:
    runs-on: ubuntu-slim
+    environment: ci-protected
    timeout-minutes: 45

    steps:
--- a/backend/ee/onyx/background/celery/apps/primary.py
+++ b/backend/ee/onyx/background/celery/apps/primary.py
@@ -5,6 +5,7 @@ from onyx.background.celery.apps.primary import celery_app
 celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
+            "ee.onyx.background.celery.tasks.hooks",
            "ee.onyx.background.celery.tasks.doc_permission_syncing",
            "ee.onyx.background.celery.tasks.external_group_syncing",
            "ee.onyx.background.celery.tasks.cloud",
--- a/backend/ee/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/ee/onyx/background/celery/tasks/beat_schedule.py
@@ -55,6 +55,15 @@ ee_tasks_to_schedule: list[dict] = []

 if not MULTI_TENANT:
    ee_tasks_to_schedule = [
+        {
+            "name": "hook-execution-log-cleanup",
+            "task": OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,
+            "schedule": timedelta(days=1),
+            "options": {
+                "priority": OnyxCeleryPriority.LOW,
+                "expires": BEAT_EXPIRES_DEFAULT,
+            },
+        },
        {
            "name": "autogenerate-usage-report",
            "task": OnyxCeleryTask.GENERATE_USAGE_REPORT_TASK,
--- a/backend/ee/onyx/background/celery/tasks/hooks/init.py
+++ b/backend/ee/onyx/background/celery/tasks/hooks/init.py
--- a/backend/ee/onyx/background/celery/tasks/hooks/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/hooks/tasks.py
--- a/backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py
@@ -13,6 +13,7 @@ from redis.lock import Lock as RedisLock
 from ee.onyx.server.tenants.provisioning import setup_tenant
 from ee.onyx.server.tenants.schema_management import create_schema_if_not_exists
 from ee.onyx.server.tenants.schema_management import get_current_alembic_version
+from ee.onyx.server.tenants.schema_management import run_alembic_migrations
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.configs.app_configs import TARGET_AVAILABLE_TENANTS
 from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
@@ -29,9 +30,10 @@ from shared_configs.configs import TENANT_ID_PREFIX
 # Each tenant takes ~80s (alembic migrations), so 5 tenants ≈ 7 minutes.
 _MAX_TENANTS_PER_RUN = 5

-# Time limits sized for worst-case batch: _MAX_TENANTS_PER_RUN × ~90s + buffer.
-_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 10  # 10 minutes
-_TENANT_PROVISIONING_TIME_LIMIT = 60 * 15  # 15 minutes
+# Time limits sized for worst-case: provisioning up to _MAX_TENANTS_PER_RUN new tenants
+# (~90s each) plus migrating up to TARGET_AVAILABLE_TENANTS pool tenants (~90s each).
+_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 20  # 20 minutes
+_TENANT_PROVISIONING_TIME_LIMIT = 60 * 25  # 25 minutes


@shared_task(
@@ -91,8 +93,7 @@ def check_available_tenants(self: Task) -> None:  # noqa: ARG001
        batch_size = min(tenants_to_provision, _MAX_TENANTS_PER_RUN)
        if batch_size < tenants_to_provision:
            task_logger.info(
-                f"Capping batch to {batch_size} "
-                f"(need {tenants_to_provision}, will catch up next cycle)"
+                f"Capping batch to {batch_size} (need {tenants_to_provision}, will catch up next cycle)"
            )

        provisioned = 0
@@ -103,12 +104,14 @@ def check_available_tenants(self: Task) -> None:  # noqa: ARG001
                    provisioned += 1
            except Exception:
                task_logger.exception(
-                    f"Failed to provision tenant {i + 1}/{batch_size}, "
-                    "continuing with remaining tenants"
+                    f"Failed to provision tenant {i + 1}/{batch_size}, continuing with remaining tenants"
                )

        task_logger.info(f"Provisioning complete: {provisioned}/{batch_size} succeeded")

+        # Migrate any pool tenants that were provisioned before a new migration was deployed
+        _migrate_stale_pool_tenants()
+
    except Exception:
        task_logger.exception("Error in check_available_tenants task")

@@ -121,6 +124,46 @@ def check_available_tenants(self: Task) -> None:  # noqa: ARG001
            )


+def _migrate_stale_pool_tenants() -> None:
+    """
+    Run alembic upgrade head on all pool tenants. Since alembic upgrade head is
+    idempotent, tenants already at head are a fast no-op. This ensures pool
+    tenants are always current so that signup doesn't hit schema mismatches
+    (e.g. missing columns added after the tenant was pre-provisioned).
+    """
+    with get_session_with_shared_schema() as db_session:
+        pool_tenants = db_session.query(AvailableTenant).all()
+        tenant_ids = [t.tenant_id for t in pool_tenants]
+
+    if not tenant_ids:
+        return
+
+    task_logger.info(
+        f"Checking {len(tenant_ids)} pool tenant(s) for pending migrations"
+    )
+
+    for tenant_id in tenant_ids:
+        try:
+            run_alembic_migrations(tenant_id)
+            new_version = get_current_alembic_version(tenant_id)
+            with get_session_with_shared_schema() as db_session:
+                tenant = (
+                    db_session.query(AvailableTenant)
+                    .filter_by(tenant_id=tenant_id)
+                    .first()
+                )
+                if tenant and tenant.alembic_version != new_version:
+                    task_logger.info(
+                        f"Migrated pool tenant {tenant_id}: {tenant.alembic_version} -> {new_version}"
+                    )
+                    tenant.alembic_version = new_version
+                    db_session.commit()
+        except Exception:
+            task_logger.exception(
+                f"Failed to migrate pool tenant {tenant_id}, skipping"
+            )
+
+
 def pre_provision_tenant() -> bool:
    """
    Pre-provision a new tenant and store it in the NewAvailableTenant table.
--- a/backend/ee/onyx/configs/license_enforcement_config.py
+++ b/backend/ee/onyx/configs/license_enforcement_config.py
@@ -69,5 +69,7 @@ EE_ONLY_PATH_PREFIXES: frozenset[str] = frozenset(
        "/admin/token-rate-limits",
        # Evals
        "/evals",
+        # Hook extensions
+        "/admin/hooks",
    }
 )
--- a/backend/ee/onyx/hooks/init.py
+++ b/backend/ee/onyx/hooks/init.py
--- a/backend/ee/onyx/hooks/executor.py
+++ b/backend/ee/onyx/hooks/executor.py
@@ -0,0 +1,385 @@
+"""Hook executor — calls a customer's external HTTP endpoint for a given hook point.
+
+Usage (Celery tasks and FastAPI handlers):
+    result = execute_hook(
+        db_session=db_session,
+        hook_point=HookPoint.QUERY_PROCESSING,
+        payload={"query": "...", "user_email": "...", "chat_session_id": "..."},
+        response_type=QueryProcessingResponse,
+    )
+
+    if isinstance(result, HookSkipped):
+        # no active hook configured — continue with original behavior
+        ...
+    elif isinstance(result, HookSoftFailed):
+        # hook failed but fail strategy is SOFT — continue with original behavior
+        ...
+    else:
+        # result is a validated Pydantic model instance (response_type)
+        ...
+
+is_reachable update policy
+--------------------------
+``is_reachable`` on the Hook row is updated selectively — only when the outcome
+carries meaningful signal about physical reachability:
+
+  NetworkError (DNS, connection refused)  → False  (cannot reach the server)
+  HTTP 401 / 403                          → False  (api_key revoked or invalid)
+  TimeoutException                        → None   (server may be slow, skip write)
+  Other HTTP errors (4xx / 5xx)           → None   (server responded, skip write)
+  Unknown exception                       → None   (no signal, skip write)
+  Non-JSON / non-dict response            → None   (server responded, skip write)
+  Success (2xx, valid dict)               → True   (confirmed reachable)
+
+None means "leave the current value unchanged" — no DB round-trip is made.
+
+DB session design
+-----------------
+The executor uses three sessions:
+
+  1. Caller's session (db_session) — used only for the hook lookup read. All
+     needed fields are extracted from the Hook object before the HTTP call, so
+     the caller's session is not held open during the external HTTP request.
+
+  2. Log session — a separate short-lived session opened after the HTTP call
+     completes to write the HookExecutionLog row on failure. Success runs are
+     not recorded. Committed independently of everything else.
+
+  3. Reachable session — a second short-lived session to update is_reachable on
+     the Hook. Kept separate from the log session so a concurrent hook deletion
+     (which causes update_hook__no_commit to raise OnyxError(NOT_FOUND)) cannot
+     prevent the execution log from being written. This update is best-effort.
+"""
+
+import json
+import time
+from typing import Any
+from typing import TypeVar
+
+import httpx
+from pydantic import BaseModel
+from pydantic import ValidationError
+from sqlalchemy.orm import Session
+
+from onyx.db.engine.sql_engine import get_session_with_current_tenant
+from onyx.db.enums import HookFailStrategy
+from onyx.db.enums import HookPoint
+from onyx.db.hook import create_hook_execution_log__no_commit
+from onyx.db.hook import get_non_deleted_hook_by_hook_point
+from onyx.db.hook import update_hook__no_commit
+from onyx.db.models import Hook
+from onyx.error_handling.error_codes import OnyxErrorCode
+from onyx.error_handling.exceptions import OnyxError
+from onyx.hooks.executor import HookSkipped
+from onyx.hooks.executor import HookSoftFailed
+from onyx.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT
+
+logger = setup_logger()
+
+
+T = TypeVar("T", bound=BaseModel)
+
+
+# ---------------------------------------------------------------------------
+# Private helpers
+# ---------------------------------------------------------------------------
+
+
+class _HttpOutcome(BaseModel):
+    """Structured result of an HTTP hook call, returned by _process_response."""
+
+    is_success: bool
+    updated_is_reachable: (
+        bool | None
+    )  # True/False = write to DB, None = unchanged (skip write)
+    status_code: int | None
+    error_message: str | None
+    response_payload: dict[str, Any] | None
+
+
+def _lookup_hook(
+    db_session: Session,
+    hook_point: HookPoint,
+) -> Hook | HookSkipped:
+    """Return the active Hook or HookSkipped if hooks are unavailable/unconfigured.
+
+    No HTTP call is made and no DB writes are performed for any HookSkipped path.
+    There is nothing to log and no reachability information to update.
+    """
+    if MULTI_TENANT:
+        return HookSkipped()
+    hook = get_non_deleted_hook_by_hook_point(
+        db_session=db_session, hook_point=hook_point
+    )
+    if hook is None or not hook.is_active:
+        return HookSkipped()
+    if not hook.endpoint_url:
+        return HookSkipped()
+    return hook
+
+
+def _process_response(
+    *,
+    response: httpx.Response | None,
+    exc: Exception | None,
+    timeout: float,
+) -> _HttpOutcome:
+    """Process the result of an HTTP call and return a structured outcome.
+
+    Called after the client.post() try/except. If post() raised, exc is set and
+    response is None. Otherwise response is set and exc is None. Handles
+    raise_for_status(), JSON decoding, and the dict shape check.
+    """
+    if exc is not None:
+        if isinstance(exc, httpx.NetworkError):
+            msg = f"Hook network error (endpoint unreachable): {exc}"
+            logger.warning(msg, exc_info=exc)
+            return _HttpOutcome(
+                is_success=False,
+                updated_is_reachable=False,
+                status_code=None,
+                error_message=msg,
+                response_payload=None,
+            )
+        if isinstance(exc, httpx.TimeoutException):
+            msg = f"Hook timed out after {timeout}s: {exc}"
+            logger.warning(msg, exc_info=exc)
+            return _HttpOutcome(
+                is_success=False,
+                updated_is_reachable=None,  # timeout doesn't indicate unreachability
+                status_code=None,
+                error_message=msg,
+                response_payload=None,
+            )
+        msg = f"Hook call failed: {exc}"
+        logger.exception(msg, exc_info=exc)
+        return _HttpOutcome(
+            is_success=False,
+            updated_is_reachable=None,  # unknown error — don't make assumptions
+            status_code=None,
+            error_message=msg,
+            response_payload=None,
+        )
+
+    if response is None:
+        raise ValueError(
+            "exactly one of response or exc must be non-None; both are None"
+        )
+    status_code = response.status_code
+
+    try:
+        response.raise_for_status()
+    except httpx.HTTPStatusError as e:
+        msg = f"Hook returned HTTP {e.response.status_code}: {e.response.text}"
+        logger.warning(msg, exc_info=e)
+        # 401/403 means the api_key has been revoked or is invalid — mark unreachable
+        # so the operator knows to update it. All other HTTP errors keep is_reachable
+        # as-is (server is up, the request just failed for application reasons).
+        auth_failed = e.response.status_code in (401, 403)
+        return _HttpOutcome(
+            is_success=False,
+            updated_is_reachable=False if auth_failed else None,
+            status_code=status_code,
+            error_message=msg,
+            response_payload=None,
+        )
+
+    try:
+        response_payload = response.json()
+    except (json.JSONDecodeError, httpx.DecodingError) as e:
+        msg = f"Hook returned non-JSON response: {e}"
+        logger.warning(msg, exc_info=e)
+        return _HttpOutcome(
+            is_success=False,
+            updated_is_reachable=None,  # server responded — reachability unchanged
+            status_code=status_code,
+            error_message=msg,
+            response_payload=None,
+        )
+
+    if not isinstance(response_payload, dict):
+        msg = f"Hook returned non-dict JSON (got {type(response_payload).__name__})"
+        logger.warning(msg)
+        return _HttpOutcome(
+            is_success=False,
+            updated_is_reachable=None,  # server responded — reachability unchanged
+            status_code=status_code,
+            error_message=msg,
+            response_payload=None,
+        )
+
+    return _HttpOutcome(
+        is_success=True,
+        updated_is_reachable=True,
+        status_code=status_code,
+        error_message=None,
+        response_payload=response_payload,
+    )
+
+
+def _persist_result(
+    *,
+    hook_id: int,
+    outcome: _HttpOutcome,
+    duration_ms: int,
+) -> None:
+    """Write the execution log on failure and optionally update is_reachable, each
+    in its own session so a failure in one does not affect the other."""
+    # Only write the execution log on failure — success runs are not recorded.
+    # Must not be skipped if the is_reachable update fails (e.g. hook concurrently
+    # deleted between the initial lookup and here).
+    if not outcome.is_success:
+        try:
+            with get_session_with_current_tenant() as log_session:
+                create_hook_execution_log__no_commit(
+                    db_session=log_session,
+                    hook_id=hook_id,
+                    is_success=False,
+                    error_message=outcome.error_message,
+                    status_code=outcome.status_code,
+                    duration_ms=duration_ms,
+                )
+                log_session.commit()
+        except Exception:
+            logger.exception(
+                f"Failed to persist hook execution log for hook_id={hook_id}"
+            )
+
+    # Update is_reachable separately — best-effort, non-critical.
+    # None means the value is unchanged (set by the caller to skip the no-op write).
+    # update_hook__no_commit can raise OnyxError(NOT_FOUND) if the hook was
+    # concurrently deleted, so keep this isolated from the log write above.
+    if outcome.updated_is_reachable is not None:
+        try:
+            with get_session_with_current_tenant() as reachable_session:
+                update_hook__no_commit(
+                    db_session=reachable_session,
+                    hook_id=hook_id,
+                    is_reachable=outcome.updated_is_reachable,
+                )
+                reachable_session.commit()
+        except Exception:
+            logger.warning(f"Failed to update is_reachable for hook_id={hook_id}")
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def _execute_hook_inner(
+    hook: Hook,
+    payload: dict[str, Any],
+    response_type: type[T],
+) -> T | HookSoftFailed:
+    """Make the HTTP call, validate the response, and return a typed model.
+
+    Raises OnyxError on HARD failure. Returns HookSoftFailed on SOFT failure.
+    """
+    timeout = hook.timeout_seconds
+    hook_id = hook.id
+    fail_strategy = hook.fail_strategy
+    endpoint_url = hook.endpoint_url
+    current_is_reachable: bool | None = hook.is_reachable
+
+    if not endpoint_url:
+        raise ValueError(
+            f"hook_id={hook_id} is active but has no endpoint_url — "
+            "active hooks without an endpoint_url must be rejected by _lookup_hook"
+        )
+
+    start = time.monotonic()
+    response: httpx.Response | None = None
+    exc: Exception | None = None
+    try:
+        api_key: str | None = (
+            hook.api_key.get_value(apply_mask=False) if hook.api_key else None
+        )
+        headers: dict[str, str] = {"Content-Type": "application/json"}
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+        with httpx.Client(
+            timeout=timeout, follow_redirects=False
+        ) as client:  # SSRF guard: never follow redirects
+            response = client.post(endpoint_url, json=payload, headers=headers)
+    except Exception as e:
+        exc = e
+    duration_ms = int((time.monotonic() - start) * 1000)
+
+    outcome = _process_response(response=response, exc=exc, timeout=timeout)
+
+    # Validate the response payload against response_type.
+    # A validation failure downgrades the outcome to a failure so it is logged,
+    # is_reachable is left unchanged (server responded — just a bad payload),
+    # and fail_strategy is respected below.
+    validated_model: T | None = None
+    if outcome.is_success and outcome.response_payload is not None:
+        try:
+            validated_model = response_type.model_validate(outcome.response_payload)
+        except ValidationError as e:
+            msg = (
+                f"Hook response failed validation against {response_type.__name__}: {e}"
+            )
+            outcome = _HttpOutcome(
+                is_success=False,
+                updated_is_reachable=None,  # server responded — reachability unchanged
+                status_code=outcome.status_code,
+                error_message=msg,
+                response_payload=None,
+            )
+
+    # Skip the is_reachable write when the value would not change — avoids a
+    # no-op DB round-trip on every call when the hook is already in the expected state.
+    if outcome.updated_is_reachable == current_is_reachable:
+        outcome = outcome.model_copy(update={"updated_is_reachable": None})
+    _persist_result(hook_id=hook_id, outcome=outcome, duration_ms=duration_ms)
+
+    if not outcome.is_success:
+        if fail_strategy == HookFailStrategy.HARD:
+            raise OnyxError(
+                OnyxErrorCode.HOOK_EXECUTION_FAILED,
+                outcome.error_message or "Hook execution failed.",
+            )
+        logger.warning(
+            f"Hook execution failed (soft fail) for hook_id={hook_id}: {outcome.error_message}"
+        )
+        return HookSoftFailed()
+
+    if validated_model is None:
+        raise OnyxError(
+            OnyxErrorCode.INTERNAL_ERROR,
+            f"validated_model is None for successful hook call (hook_id={hook_id})",
+        )
+    return validated_model
+
+
+def _execute_hook_impl(
+    *,
+    db_session: Session,
+    hook_point: HookPoint,
+    payload: dict[str, Any],
+    response_type: type[T],
+) -> T | HookSkipped | HookSoftFailed:
+    """EE implementation — loaded by CE's execute_hook via fetch_versioned_implementation.
+
+    Returns HookSkipped if no active hook is configured, HookSoftFailed if the
+    hook failed with SOFT fail strategy, or a validated response model on success.
+    Raises OnyxError on HARD failure or if the hook is misconfigured.
+    """
+    hook = _lookup_hook(db_session, hook_point)
+    if isinstance(hook, HookSkipped):
+        return hook
+
+    fail_strategy = hook.fail_strategy
+    hook_id = hook.id
+
+    try:
+        return _execute_hook_inner(hook, payload, response_type)
+    except Exception:
+        if fail_strategy == HookFailStrategy.SOFT:
+            logger.exception(
+                f"Unexpected error in hook execution (soft fail) for hook_id={hook_id}"
+            )
+            return HookSoftFailed()
+        raise
--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -15,6 +15,7 @@ from ee.onyx.server.enterprise_settings.api import (
    basic_router as enterprise_settings_router,
 )
 from ee.onyx.server.evals.api import router as evals_router
+from ee.onyx.server.features.hooks.api import router as hook_router
 from ee.onyx.server.license.api import router as license_router
 from ee.onyx.server.manage.standard_answer import router as standard_answer_router
 from ee.onyx.server.middleware.license_enforcement import (
@@ -138,6 +139,7 @@ def get_application() -> FastAPI:
    include_router_with_global_prefix_prepended(application, ee_oauth_router)
    include_router_with_global_prefix_prepended(application, ee_document_cc_pair_router)
    include_router_with_global_prefix_prepended(application, evals_router)
+    include_router_with_global_prefix_prepended(application, hook_router)

    # Enterprise-only global settings
    include_router_with_global_prefix_prepended(
--- a/backend/ee/onyx/server/features/init.py
+++ b/backend/ee/onyx/server/features/init.py
--- a/backend/ee/onyx/server/features/hooks/init.py
+++ b/backend/ee/onyx/server/features/hooks/init.py
--- a/backend/ee/onyx/server/features/hooks/api.py
+++ b/backend/ee/onyx/server/features/hooks/api.py
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -99,6 +99,26 @@ async def get_or_provision_tenant(
        tenant_id = await get_available_tenant()

        if tenant_id:
+            # Run migrations to ensure the pre-provisioned tenant schema is current.
+            # Pool tenants may have been created before a new migration was deployed.
+            # Capture as a non-optional local so mypy can type the lambda correctly.
+            _tenant_id: str = tenant_id
+            loop = asyncio.get_running_loop()
+            try:
+                await loop.run_in_executor(
+                    None, lambda: run_alembic_migrations(_tenant_id)
+                )
+            except Exception:
+                # The tenant was already dequeued from the pool — roll it back so
+                # it doesn't end up orphaned (schema exists, but not assigned to anyone).
+                logger.exception(
+                    f"Migration failed for pre-provisioned tenant {_tenant_id}; rolling back"
+                )
+                try:
+                    await rollback_tenant_provisioning(_tenant_id)
+                except Exception:
+                    logger.exception(f"Failed to rollback orphaned tenant {_tenant_id}")
+                raise
            # If we have a pre-provisioned tenant, assign it to the user
            await assign_tenant_to_user(tenant_id, email, referral_source)
            logger.info(f"Assigned pre-provisioned tenant {tenant_id} to user {email}")
--- a/backend/model_server/main.py
+++ b/backend/model_server/main.py
@@ -100,6 +100,7 @@ def get_model_app() -> FastAPI:
            dsn=SENTRY_DSN,
            integrations=[StarletteIntegration(), FastApiIntegration()],
            traces_sample_rate=0.1,
+            release=__version__,
        )
        logger.info("Sentry initialized")
    else:
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -20,6 +20,7 @@ from sentry_sdk.integrations.celery import CeleryIntegration
 from sqlalchemy import text
 from sqlalchemy.orm import Session

+from onyx import __version__
 from onyx.background.celery.apps.task_formatters import CeleryTaskColoredFormatter
 from onyx.background.celery.apps.task_formatters import CeleryTaskPlainFormatter
 from onyx.background.celery.celery_utils import celery_is_worker_primary
@@ -65,6 +66,7 @@ if SENTRY_DSN:
        dsn=SENTRY_DSN,
        integrations=[CeleryIntegration()],
        traces_sample_rate=0.1,
+        release=__version__,
    )
    logger.info("Sentry initialized")
 else:
@@ -515,7 +517,8 @@ def reset_tenant_id(


 def wait_for_vespa_or_shutdown(
-    sender: Any, **kwargs: Any  # noqa: ARG001
+    sender: Any,  # noqa: ARG001
+    **kwargs: Any,  # noqa: ARG001
 ) -> None:  # noqa: ARG001
    """Waits for Vespa to become ready subject to a timeout.
    Raises WorkerShutdown if the timeout is reached."""
--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -317,7 +317,6 @@ celery_app.autodiscover_tasks(
            "onyx.background.celery.tasks.docprocessing",
            "onyx.background.celery.tasks.evals",
            "onyx.background.celery.tasks.hierarchyfetching",
-            "onyx.background.celery.tasks.hooks",
            "onyx.background.celery.tasks.periodic",
            "onyx.background.celery.tasks.pruning",
            "onyx.background.celery.tasks.shared",
--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -14,7 +14,6 @@ from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
-from onyx.hooks.utils import HOOKS_AVAILABLE
 from shared_configs.configs import MULTI_TENANT

 # choosing 15 minutes because it roughly gives us enough time to process many tasks
@@ -362,19 +361,6 @@ if not MULTI_TENANT:

    tasks_to_schedule.extend(beat_task_templates)

-if HOOKS_AVAILABLE:
-    tasks_to_schedule.append(
-        {
-            "name": "hook-execution-log-cleanup",
-            "task": OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,
-            "schedule": timedelta(days=1),
-            "options": {
-                "priority": OnyxCeleryPriority.LOW,
-                "expires": BEAT_EXPIRES_DEFAULT,
-            },
-        }
-    )
-

 def generate_cloud_tasks(
    beat_tasks: list[dict], beat_templates: list[dict], beat_multiplier: float
--- a/backend/onyx/background/celery/tasks/docfetching/tasks.py
+++ b/backend/onyx/background/celery/tasks/docfetching/tasks.py
@@ -9,6 +9,7 @@ from celery import Celery
 from celery import shared_task
 from celery import Task

+from onyx import __version__
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.memory_monitoring import emit_process_memory
 from onyx.background.celery.tasks.docprocessing.heartbeat import start_heartbeat
@@ -137,6 +138,7 @@ def _docfetching_task(
        sentry_sdk.init(
            dsn=SENTRY_DSN,
            traces_sample_rate=0.1,
+            release=__version__,
        )
        logger.info("Sentry initialized")
    else:
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -319,6 +319,11 @@ def monitor_indexing_attempt_progress(
    )

    current_db_time = get_db_current_time(db_session)
+    total_batches: int | str = (
+        coordination_status.total_batches
+        if coordination_status.total_batches is not None
+        else "?"
+    )
    if coordination_status.found:
        task_logger.info(
            f"Indexing attempt progress: "
@@ -326,7 +331,7 @@ def monitor_indexing_attempt_progress(
            f"cc_pair={attempt.connector_credential_pair_id} "
            f"search_settings={attempt.search_settings_id} "
            f"completed_batches={coordination_status.completed_batches} "
-            f"total_batches={coordination_status.total_batches or '?'} "
+            f"total_batches={total_batches} "
            f"total_docs={coordination_status.total_docs} "
            f"total_failures={coordination_status.total_failures}"
            f"elapsed={(current_db_time - attempt.time_created).seconds}"
@@ -410,7 +415,7 @@ def check_indexing_completion(
    logger.info(
        f"Indexing status: "
        f"indexing_completed={indexing_completed} "
-        f"batches_processed={batches_processed}/{batches_total or '?'} "
+        f"batches_processed={batches_processed}/{batches_total if batches_total is not None else '?'} "
        f"total_docs={coordination_status.total_docs} "
        f"total_chunks={coordination_status.total_chunks} "
        f"total_failures={coordination_status.total_failures}"
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -805,6 +805,10 @@ MINI_CHUNK_SIZE = 150
 # This is the number of regular chunks per large chunk
 LARGE_CHUNK_RATIO = 4

+# The maximum number of chunks that can be held for 1 document processing batch
+# The purpose of this is to set an upper bound on memory usage
+MAX_CHUNKS_PER_DOC_BATCH = int(os.environ.get("MAX_CHUNKS_PER_DOC_BATCH") or 1000)
+
 # Include the document level metadata in each chunk. If the metadata is too long, then it is thrown out
 # We don't want the metadata to overwhelm the actual contents of the chunk
 SKIP_METADATA_IN_CHUNK = os.environ.get("SKIP_METADATA_IN_CHUNK", "").lower() == "true"
@@ -1075,7 +1079,6 @@ POD_NAMESPACE = os.environ.get("POD_NAMESPACE")

 DEV_MODE = os.environ.get("DEV_MODE", "").lower() == "true"

-HOOK_ENABLED = os.environ.get("HOOK_ENABLED", "").lower() == "true"

 INTEGRATION_TESTS_MODE = os.environ.get("INTEGRATION_TESTS_MODE", "").lower() == "true"

--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -212,6 +212,7 @@ class DocumentSource(str, Enum):
    PRODUCTBOARD = "productboard"
    FILE = "file"
    CODA = "coda"
+    CANVAS = "canvas"
    NOTION = "notion"
    ZULIP = "zulip"
    LINEAR = "linear"
@@ -672,6 +673,7 @@ DocumentSourceDescription: dict[DocumentSource, str] = {
    DocumentSource.SLAB: "slab data",
    DocumentSource.PRODUCTBOARD: "productboard data (boards, etc.)",
    DocumentSource.FILE: "files",
+    DocumentSource.CANVAS: "canvas lms - courses, pages, assignments, and announcements",
    DocumentSource.CODA: "coda - team workspace with docs, tables, and pages",
    DocumentSource.NOTION: "notion data - a workspace that combines note-taking, \
 project management, and collaboration tools into a single, customizable platform",
--- a/backend/onyx/connectors/canvas/access.py
+++ b/backend/onyx/connectors/canvas/access.py
@@ -0,0 +1,32 @@
+"""
+Permissioning / AccessControl logic for Canvas courses.
+
+CE stub — returns None (no permissions). The EE implementation is loaded
+at runtime via ``fetch_versioned_implementation``.
+"""
+
+from collections.abc import Callable
+from typing import cast
+
+from onyx.access.models import ExternalAccess
+from onyx.connectors.canvas.client import CanvasApiClient
+from onyx.utils.variable_functionality import fetch_versioned_implementation
+from onyx.utils.variable_functionality import global_version
+
+
+def get_course_permissions(
+    canvas_client: CanvasApiClient,
+    course_id: int,
+) -> ExternalAccess | None:
+    if not global_version.is_ee_version():
+        return None
+
+    ee_get_course_permissions = cast(
+        Callable[[CanvasApiClient, int], ExternalAccess | None],
+        fetch_versioned_implementation(
+            "onyx.external_permissions.canvas.access",
+            "get_course_permissions",
+        ),
+    )
+
+    return ee_get_course_permissions(canvas_client, course_id)
--- a/backend/onyx/connectors/canvas/client.py
+++ b/backend/onyx/connectors/canvas/client.py
@@ -2,6 +2,7 @@ from __future__ import annotations

 import logging
 import re
+from collections.abc import Iterator
 from typing import Any
 from urllib.parse import urlparse

@@ -190,3 +191,22 @@ class CanvasApiClient:
        if clean_endpoint:
            final_url += "/" + clean_endpoint
        return final_url
+
+    def paginate(
+        self,
+        endpoint: str,
+        params: dict[str, Any] | None = None,
+    ) -> Iterator[list[Any]]:
+        """Yield each page of results, following Link-header pagination.
+
+        Makes the first request with endpoint + params, then follows
+        next_url from Link headers for subsequent pages.
+        """
+        response, next_url = self.get(endpoint, params=params)
+        while True:
+            if not response:
+                break
+            yield response
+            if not next_url:
+                break
+            response, next_url = self.get(full_url=next_url)
--- a/backend/onyx/connectors/canvas/connector.py
+++ b/backend/onyx/connectors/canvas/connector.py
@@ -1,17 +1,82 @@
+from datetime import datetime
+from datetime import timezone
+from typing import Any
+from typing import cast
 from typing import Literal
+from typing import NoReturn
 from typing import TypeAlias

 from pydantic import BaseModel
+from retry import retry
+from typing_extensions import override

+from onyx.access.models import ExternalAccess
+from onyx.configs.app_configs import INDEX_BATCH_SIZE
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.canvas.access import get_course_permissions
+from onyx.connectors.canvas.client import CanvasApiClient
+from onyx.connectors.exceptions import ConnectorValidationError
+from onyx.connectors.exceptions import CredentialExpiredError
+from onyx.connectors.exceptions import InsufficientPermissionsError
+from onyx.connectors.exceptions import UnexpectedValidationError
+from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
+from onyx.connectors.interfaces import CheckpointOutput
+from onyx.connectors.interfaces import GenerateSlimDocumentOutput
+from onyx.connectors.interfaces import SecondsSinceUnixEpoch
+from onyx.connectors.interfaces import SlimConnectorWithPermSync
 from onyx.connectors.models import ConnectorCheckpoint
+from onyx.connectors.models import ConnectorMissingCredentialError
+from onyx.connectors.models import Document
+from onyx.connectors.models import ImageSection
+from onyx.connectors.models import TextSection
+from onyx.error_handling.exceptions import OnyxError
+from onyx.file_processing.html_utils import parse_html_page_basic
+from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def _handle_canvas_api_error(e: OnyxError) -> NoReturn:
+    """Map Canvas API errors to connector framework exceptions."""
+    if e.status_code == 401:
+        raise CredentialExpiredError(
+            "Canvas API token is invalid or expired (HTTP 401)."
+        )
+    elif e.status_code == 403:
+        raise InsufficientPermissionsError(
+            "Canvas API token does not have sufficient permissions (HTTP 403)."
+        )
+    elif e.status_code == 429:
+        raise ConnectorValidationError(
+            "Canvas rate-limit exceeded (HTTP 429). Please try again later."
+        )
+    elif e.status_code >= 500:
+        raise UnexpectedValidationError(
+            f"Unexpected Canvas HTTP error (status={e.status_code}): {e}"
+        )
+    else:
+        raise ConnectorValidationError(
+            f"Canvas API error (status={e.status_code}): {e}"
+        )


 class CanvasCourse(BaseModel):
    id: int
-    name: str
-    course_code: str
-    created_at: str
-    workflow_state: str
+    name: str | None = None
+    course_code: str | None = None
+    created_at: str | None = None
+    workflow_state: str | None = None
+
+    @classmethod
+    def from_api(cls, payload: dict[str, Any]) -> "CanvasCourse":
+        return cls(
+            id=payload["id"],
+            name=payload.get("name"),
+            course_code=payload.get("course_code"),
+            created_at=payload.get("created_at"),
+            workflow_state=payload.get("workflow_state"),
+        )


 class CanvasPage(BaseModel):
@@ -19,10 +84,22 @@ class CanvasPage(BaseModel):
    url: str
    title: str
    body: str | None = None
-    created_at: str
-    updated_at: str
+    created_at: str | None = None
+    updated_at: str | None = None
    course_id: int

+    @classmethod
+    def from_api(cls, payload: dict[str, Any], course_id: int) -> "CanvasPage":
+        return cls(
+            page_id=payload["page_id"],
+            url=payload["url"],
+            title=payload["title"],
+            body=payload.get("body"),
+            created_at=payload.get("created_at"),
+            updated_at=payload.get("updated_at"),
+            course_id=course_id,
+        )
+

 class CanvasAssignment(BaseModel):
    id: int
@@ -30,10 +107,23 @@ class CanvasAssignment(BaseModel):
    description: str | None = None
    html_url: str
    course_id: int
-    created_at: str
-    updated_at: str
+    created_at: str | None = None
+    updated_at: str | None = None
    due_at: str | None = None

+    @classmethod
+    def from_api(cls, payload: dict[str, Any], course_id: int) -> "CanvasAssignment":
+        return cls(
+            id=payload["id"],
+            name=payload["name"],
+            description=payload.get("description"),
+            html_url=payload["html_url"],
+            course_id=course_id,
+            created_at=payload.get("created_at"),
+            updated_at=payload.get("updated_at"),
+            due_at=payload.get("due_at"),
+        )
+

 class CanvasAnnouncement(BaseModel):
    id: int
@@ -43,6 +133,17 @@ class CanvasAnnouncement(BaseModel):
    posted_at: str | None = None
    course_id: int

+    @classmethod
+    def from_api(cls, payload: dict[str, Any], course_id: int) -> "CanvasAnnouncement":
+        return cls(
+            id=payload["id"],
+            title=payload["title"],
+            message=payload.get("message"),
+            html_url=payload["html_url"],
+            posted_at=payload.get("posted_at"),
+            course_id=course_id,
+        )
+

 CanvasStage: TypeAlias = Literal["pages", "assignments", "announcements"]

@@ -72,3 +173,286 @@ class CanvasConnectorCheckpoint(ConnectorCheckpoint):
        self.current_course_index += 1
        self.stage = "pages"
        self.next_url = None
+
+
+class CanvasConnector(
+    CheckpointedConnectorWithPermSync[CanvasConnectorCheckpoint],
+    SlimConnectorWithPermSync,
+):
+    def __init__(
+        self,
+        canvas_base_url: str,
+        batch_size: int = INDEX_BATCH_SIZE,
+    ) -> None:
+        self.canvas_base_url = canvas_base_url.rstrip("/").removesuffix("/api/v1")
+        self.batch_size = batch_size
+        self._canvas_client: CanvasApiClient | None = None
+        self._course_permissions_cache: dict[int, ExternalAccess | None] = {}
+
+    @property
+    def canvas_client(self) -> CanvasApiClient:
+        if self._canvas_client is None:
+            raise ConnectorMissingCredentialError("Canvas")
+        return self._canvas_client
+
+    def _get_course_permissions(self, course_id: int) -> ExternalAccess | None:
+        """Get course permissions with caching."""
+        if course_id not in self._course_permissions_cache:
+            self._course_permissions_cache[course_id] = get_course_permissions(
+                canvas_client=self.canvas_client,
+                course_id=course_id,
+            )
+        return self._course_permissions_cache[course_id]
+
+    @retry(tries=3, delay=1, backoff=2)
+    def _list_courses(self) -> list[CanvasCourse]:
+        """Fetch all courses accessible to the authenticated user."""
+        logger.debug("Fetching Canvas courses")
+
+        courses: list[CanvasCourse] = []
+        for page in self.canvas_client.paginate(
+            "courses", params={"per_page": "100", "state[]": "available"}
+        ):
+            courses.extend(CanvasCourse.from_api(c) for c in page)
+        return courses
+
+    @retry(tries=3, delay=1, backoff=2)
+    def _list_pages(self, course_id: int) -> list[CanvasPage]:
+        """Fetch all pages for a given course."""
+        logger.debug(f"Fetching pages for course {course_id}")
+
+        pages: list[CanvasPage] = []
+        for page in self.canvas_client.paginate(
+            f"courses/{course_id}/pages",
+            params={"per_page": "100", "include[]": "body", "published": "true"},
+        ):
+            pages.extend(CanvasPage.from_api(p, course_id=course_id) for p in page)
+        return pages
+
+    @retry(tries=3, delay=1, backoff=2)
+    def _list_assignments(self, course_id: int) -> list[CanvasAssignment]:
+        """Fetch all assignments for a given course."""
+        logger.debug(f"Fetching assignments for course {course_id}")
+
+        assignments: list[CanvasAssignment] = []
+        for page in self.canvas_client.paginate(
+            f"courses/{course_id}/assignments",
+            params={"per_page": "100", "published": "true"},
+        ):
+            assignments.extend(
+                CanvasAssignment.from_api(a, course_id=course_id) for a in page
+            )
+        return assignments
+
+    @retry(tries=3, delay=1, backoff=2)
+    def _list_announcements(self, course_id: int) -> list[CanvasAnnouncement]:
+        """Fetch all announcements for a given course."""
+        logger.debug(f"Fetching announcements for course {course_id}")
+
+        announcements: list[CanvasAnnouncement] = []
+        for page in self.canvas_client.paginate(
+            "announcements",
+            params={
+                "per_page": "100",
+                "context_codes[]": f"course_{course_id}",
+                "active_only": "true",
+            },
+        ):
+            announcements.extend(
+                CanvasAnnouncement.from_api(a, course_id=course_id) for a in page
+            )
+        return announcements
+
+    def _build_document(
+        self,
+        doc_id: str,
+        link: str,
+        text: str,
+        semantic_identifier: str,
+        doc_updated_at: datetime | None,
+        course_id: int,
+        doc_type: str,
+    ) -> Document:
+        """Build a Document with standard Canvas fields."""
+        return Document(
+            id=doc_id,
+            sections=cast(
+                list[TextSection | ImageSection],
+                [TextSection(link=link, text=text)],
+            ),
+            source=DocumentSource.CANVAS,
+            semantic_identifier=semantic_identifier,
+            doc_updated_at=doc_updated_at,
+            metadata={"course_id": str(course_id), "type": doc_type},
+        )
+
+    def _convert_page_to_document(self, page: CanvasPage) -> Document:
+        """Convert a Canvas page to a Document."""
+        link = f"{self.canvas_base_url}/courses/{page.course_id}/pages/{page.url}"
+
+        text_parts = [page.title]
+        body_text = parse_html_page_basic(page.body) if page.body else ""
+        if body_text:
+            text_parts.append(body_text)
+
+        doc_updated_at = (
+            datetime.fromisoformat(page.updated_at.replace("Z", "+00:00")).astimezone(
+                timezone.utc
+            )
+            if page.updated_at
+            else None
+        )
+
+        document = self._build_document(
+            doc_id=f"canvas-page-{page.course_id}-{page.page_id}",
+            link=link,
+            text="\n\n".join(text_parts),
+            semantic_identifier=page.title or f"Page {page.page_id}",
+            doc_updated_at=doc_updated_at,
+            course_id=page.course_id,
+            doc_type="page",
+        )
+        return document
+
+    def _convert_assignment_to_document(self, assignment: CanvasAssignment) -> Document:
+        """Convert a Canvas assignment to a Document."""
+        text_parts = [assignment.name]
+        desc_text = (
+            parse_html_page_basic(assignment.description)
+            if assignment.description
+            else ""
+        )
+        if desc_text:
+            text_parts.append(desc_text)
+        if assignment.due_at:
+            due_dt = datetime.fromisoformat(
+                assignment.due_at.replace("Z", "+00:00")
+            ).astimezone(timezone.utc)
+            text_parts.append(f"Due: {due_dt.strftime('%B %d, %Y %H:%M UTC')}")
+
+        doc_updated_at = (
+            datetime.fromisoformat(
+                assignment.updated_at.replace("Z", "+00:00")
+            ).astimezone(timezone.utc)
+            if assignment.updated_at
+            else None
+        )
+
+        document = self._build_document(
+            doc_id=f"canvas-assignment-{assignment.course_id}-{assignment.id}",
+            link=assignment.html_url,
+            text="\n\n".join(text_parts),
+            semantic_identifier=assignment.name or f"Assignment {assignment.id}",
+            doc_updated_at=doc_updated_at,
+            course_id=assignment.course_id,
+            doc_type="assignment",
+        )
+        return document
+
+    def _convert_announcement_to_document(
+        self, announcement: CanvasAnnouncement
+    ) -> Document:
+        """Convert a Canvas announcement to a Document."""
+        text_parts = [announcement.title]
+        msg_text = (
+            parse_html_page_basic(announcement.message) if announcement.message else ""
+        )
+        if msg_text:
+            text_parts.append(msg_text)
+
+        doc_updated_at = (
+            datetime.fromisoformat(
+                announcement.posted_at.replace("Z", "+00:00")
+            ).astimezone(timezone.utc)
+            if announcement.posted_at
+            else None
+        )
+
+        document = self._build_document(
+            doc_id=f"canvas-announcement-{announcement.course_id}-{announcement.id}",
+            link=announcement.html_url,
+            text="\n\n".join(text_parts),
+            semantic_identifier=announcement.title or f"Announcement {announcement.id}",
+            doc_updated_at=doc_updated_at,
+            course_id=announcement.course_id,
+            doc_type="announcement",
+        )
+        return document
+
+    @override
+    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
+        """Load and validate Canvas credentials."""
+        access_token = credentials.get("canvas_access_token")
+        if not access_token:
+            raise ConnectorMissingCredentialError("Canvas")
+
+        try:
+            client = CanvasApiClient(
+                bearer_token=access_token,
+                canvas_base_url=self.canvas_base_url,
+            )
+            client.get("courses", params={"per_page": "1"})
+        except ValueError as e:
+            raise ConnectorValidationError(f"Invalid Canvas base URL: {e}")
+        except OnyxError as e:
+            _handle_canvas_api_error(e)
+
+        self._canvas_client = client
+        return None
+
+    @override
+    def validate_connector_settings(self) -> None:
+        """Validate Canvas connector settings by testing API access."""
+        try:
+            self.canvas_client.get("courses", params={"per_page": "1"})
+            logger.info("Canvas connector settings validated successfully")
+        except OnyxError as e:
+            _handle_canvas_api_error(e)
+        except ConnectorMissingCredentialError:
+            raise
+        except Exception as exc:
+            raise UnexpectedValidationError(
+                f"Unexpected error during Canvas settings validation: {exc}"
+            )
+
+    @override
+    def load_from_checkpoint(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: CanvasConnectorCheckpoint,
+    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
+        # TODO(benwu408): implemented in PR3 (checkpoint)
+        raise NotImplementedError
+
+    @override
+    def load_from_checkpoint_with_perm_sync(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: CanvasConnectorCheckpoint,
+    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
+        # TODO(benwu408): implemented in PR3 (checkpoint)
+        raise NotImplementedError
+
+    @override
+    def build_dummy_checkpoint(self) -> CanvasConnectorCheckpoint:
+        # TODO(benwu408): implemented in PR3 (checkpoint)
+        raise NotImplementedError
+
+    @override
+    def validate_checkpoint_json(
+        self, checkpoint_json: str
+    ) -> CanvasConnectorCheckpoint:
+        # TODO(benwu408): implemented in PR3 (checkpoint)
+        raise NotImplementedError
+
+    @override
+    def retrieve_all_slim_docs_perm_sync(
+        self,
+        start: SecondsSinceUnixEpoch | None = None,
+        end: SecondsSinceUnixEpoch | None = None,
+        callback: IndexingHeartbeatInterface | None = None,
+    ) -> GenerateSlimDocumentOutput:
+        # TODO(benwu408): implemented in PR4 (perm sync)
+        raise NotImplementedError
--- a/backend/onyx/connectors/discord/connector.py
+++ b/backend/onyx/connectors/discord/connector.py
@@ -11,11 +11,13 @@ from discord import Client
 from discord.channel import TextChannel
 from discord.channel import Thread
 from discord.enums import MessageType
+from discord.errors import LoginFailure
 from discord.flags import Intents
 from discord.message import Message as DiscordMessage

 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
+from onyx.connectors.exceptions import CredentialInvalidError
 from onyx.connectors.interfaces import GenerateDocumentsOutput
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
@@ -209,8 +211,19 @@ def _manage_async_retrieval(
        intents = Intents.default()
        intents.message_content = True
        async with Client(intents=intents) as discord_client:
-            asyncio.create_task(discord_client.start(token))
-            await discord_client.wait_until_ready()
+            start_task = asyncio.create_task(discord_client.start(token))
+            ready_task = asyncio.create_task(discord_client.wait_until_ready())
+
+            done, _ = await asyncio.wait(
+                {start_task, ready_task},
+                return_when=asyncio.FIRST_COMPLETED,
+            )
+
+            # start() runs indefinitely once connected, so it only lands
+            # in `done` when login/connection failed — propagate the error.
+            if start_task in done:
+                ready_task.cancel()
+                start_task.result()

            filtered_channels: list[TextChannel] = await _fetch_filtered_channels(
                discord_client=discord_client,
@@ -276,6 +289,19 @@ class DiscordConnector(PollConnector, LoadConnector):
        self._discord_bot_token = credentials["discord_bot_token"]
        return None

+    def validate_connector_settings(self) -> None:
+        loop = asyncio.new_event_loop()
+        try:
+            client = Client(intents=Intents.default())
+            try:
+                loop.run_until_complete(client.login(self.discord_bot_token))
+            except LoginFailure as e:
+                raise CredentialInvalidError(f"Invalid Discord bot token: {e}")
+            finally:
+                loop.run_until_complete(client.close())
+        finally:
+            loop.close()
+
    def _manage_doc_batching(
        self,
        start: datetime | None = None,
--- a/backend/onyx/connectors/registry.py
+++ b/backend/onyx/connectors/registry.py
@@ -72,6 +72,10 @@ CONNECTOR_CLASS_MAP = {
        module_path="onyx.connectors.coda.connector",
        class_name="CodaConnector",
    ),
+    DocumentSource.CANVAS: ConnectorMapping(
+        module_path="onyx.connectors.canvas.connector",
+        class_name="CanvasConnector",
+    ),
    DocumentSource.NOTION: ConnectorMapping(
        module_path="onyx.connectors.notion.connector",
        class_name="NotionConnector",
--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@@ -8,7 +8,6 @@ from uuid import UUID
 from fastapi import HTTPException
 from sqlalchemy import delete
 from sqlalchemy import desc
-from sqlalchemy import exists
 from sqlalchemy import func
 from sqlalchemy import nullsfirst
 from sqlalchemy import or_
@@ -132,32 +131,47 @@ def get_chat_sessions_by_user(
    if before is not None:
        stmt = stmt.where(ChatSession.time_updated < before)

-    if limit:
-        stmt = stmt.limit(limit)
-
    if project_id is not None:
        stmt = stmt.where(ChatSession.project_id == project_id)
    elif only_non_project_chats:
        stmt = stmt.where(ChatSession.project_id.is_(None))

-    if not include_failed_chats:
-        non_system_message_exists_subq = (
-            exists()
-            .where(ChatMessage.chat_session_id == ChatSession.id)
-            .where(ChatMessage.message_type != MessageType.SYSTEM)
-            .correlate(ChatSession)
-        )
-
-        # Leeway for newly created chats that don't have messages yet
-        time = datetime.now(timezone.utc) - timedelta(minutes=5)
-        recently_created = ChatSession.time_created >= time
-
-        stmt = stmt.where(or_(non_system_message_exists_subq, recently_created))
+    # When filtering out failed chats, we apply the limit in Python after
+    # filtering rather than in SQL, since the post-filter may remove rows.
+    if limit and include_failed_chats:
+        stmt = stmt.limit(limit)

    result = db_session.execute(stmt)
-    chat_sessions = result.scalars().all()
+    chat_sessions = list(result.scalars().all())

-    return list(chat_sessions)
+    if not include_failed_chats and chat_sessions:
+        # Filter out "failed" sessions (those with only SYSTEM messages)
+        # using a separate efficient query instead of a correlated EXISTS
+        # subquery, which causes full sequential scans of chat_message.
+        leeway = datetime.now(timezone.utc) - timedelta(minutes=5)
+        session_ids = [cs.id for cs in chat_sessions if cs.time_created < leeway]
+
+        if session_ids:
+            valid_session_ids_stmt = (
+                select(ChatMessage.chat_session_id)
+                .where(ChatMessage.chat_session_id.in_(session_ids))
+                .where(ChatMessage.message_type != MessageType.SYSTEM)
+                .distinct()
+            )
+            valid_session_ids = set(
+                db_session.execute(valid_session_ids_stmt).scalars().all()
+            )
+
+            chat_sessions = [
+                cs
+                for cs in chat_sessions
+                if cs.time_created >= leeway or cs.id in valid_session_ids
+            ]
+
+        if limit:
+            chat_sessions = chat_sessions[:limit]
+
+    return chat_sessions


 def delete_orphaned_search_docs(db_session: Session) -> None:
--- a/backend/onyx/document_index/opensearch/client.py
+++ b/backend/onyx/document_index/opensearch/client.py
@@ -932,7 +932,7 @@ class OpenSearchIndexClient(OpenSearchClient):
    def search_for_document_ids(
        self,
        body: dict[str, Any],
-        search_type: OpenSearchSearchType = OpenSearchSearchType.DOCUMENT_IDS,
+        search_type: OpenSearchSearchType = OpenSearchSearchType.UNKNOWN,
    ) -> list[str]:
        """Searches the index and returns only document chunk IDs.

--- a/backend/onyx/document_index/opensearch/constants.py
+++ b/backend/onyx/document_index/opensearch/constants.py
@@ -60,8 +60,7 @@ class OpenSearchSearchType(str, Enum):
    KEYWORD = "keyword"
    SEMANTIC = "semantic"
    RANDOM = "random"
-    ID_RETRIEVAL = "id_retrieval"
-    DOCUMENT_IDS = "document_ids"
+    DOC_ID_RETRIEVAL = "doc_id_retrieval"
    UNKNOWN = "unknown"


--- a/backend/onyx/document_index/opensearch/opensearch_document_index.py
+++ b/backend/onyx/document_index/opensearch/opensearch_document_index.py
@@ -6,6 +6,7 @@ import httpx
 from opensearchpy import NotFoundError

 from onyx.access.models import DocumentAccess
+from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
 from onyx.configs.app_configs import VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT
 from onyx.configs.chat_configs import NUM_RETURNED_HITS
 from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
@@ -738,6 +739,9 @@ class OpenSearchDocumentIndex(DocumentIndex):
                    _flush_chunks(current_chunks)
                current_doc_id = doc_id
                current_chunks = [chunk]
+            elif len(current_chunks) >= MAX_CHUNKS_PER_DOC_BATCH:
+                _flush_chunks(current_chunks)
+                current_chunks = [chunk]
            else:
                current_chunks.append(chunk)

@@ -924,7 +928,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
            search_hits = self._client.search(
                body=query_body,
                search_pipeline_id=None,
-                search_type=OpenSearchSearchType.ID_RETRIEVAL,
+                search_type=OpenSearchSearchType.DOC_ID_RETRIEVAL,
            )
            inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
                _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
--- a/backend/onyx/document_index/vespa/vespa_document_index.py
+++ b/backend/onyx/document_index/vespa/vespa_document_index.py
@@ -10,6 +10,7 @@ import httpx
 from pydantic import BaseModel
 from retry import retry

+from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
 from onyx.configs.app_configs import RECENCY_BIAS_MULTIPLIER
 from onyx.configs.app_configs import RERANK_COUNT
 from onyx.configs.chat_configs import DOC_TIME_DECAY
@@ -427,7 +428,9 @@ class VespaDocumentIndex(DocumentIndex):
                new_document_id_to_original_document_id,
                all_cleaned_doc_ids,
            )
-            for chunk_batch in batch_generator(cleaned_chunks, BATCH_SIZE):
+            for chunk_batch in batch_generator(
+                cleaned_chunks, min(BATCH_SIZE, MAX_CHUNKS_PER_DOC_BATCH)
+            ):
                batch_index_vespa_chunks(
                    chunks=chunk_batch,
                    index_name=self._index_name,
--- a/backend/onyx/file_processing/extract_file_text.py
+++ b/backend/onyx/file_processing/extract_file_text.py
@@ -44,6 +44,7 @@ KNOWN_OPENPYXL_BUGS = [
    "Value must be either numerical or a string containing a wildcard",
    "File contains no valid workbook part",
    "Unable to read workbook: could not read stylesheet from None",
+    "Colors must be aRGB hex values",
 ]


--- a/backend/onyx/hooks/api_dependencies.py
+++ b/backend/onyx/hooks/api_dependencies.py
@@ -1,4 +1,3 @@
-from onyx.configs.app_configs import HOOK_ENABLED
 from onyx.error_handling.error_codes import OnyxErrorCode
 from onyx.error_handling.exceptions import OnyxError
 from shared_configs.configs import MULTI_TENANT
@@ -7,10 +6,7 @@ from shared_configs.configs import MULTI_TENANT
 def require_hook_enabled() -> None:
    """FastAPI dependency that gates all hook management endpoints.

-    Hooks are only available in single-tenant / self-hosted deployments with
-    HOOK_ENABLED=true explicitly set. Two layers of protection:
-      1. MULTI_TENANT check — rejects even if HOOK_ENABLED is accidentally set true
-      2. HOOK_ENABLED flag — explicit opt-in by the operator
+    Hooks are only available in single-tenant / self-hosted EE deployments.

    Use as: Depends(require_hook_enabled)
    """
@@ -19,8 +15,3 @@ def require_hook_enabled() -> None:
            OnyxErrorCode.SINGLE_TENANT_ONLY,
            "Hooks are not available in multi-tenant deployments",
        )
-    if not HOOK_ENABLED:
-        raise OnyxError(
-            OnyxErrorCode.ENV_VAR_GATED,
-            "Hooks are not enabled. Set HOOK_ENABLED=true to enable.",
-        )
--- a/backend/onyx/hooks/executor.py
+++ b/backend/onyx/hooks/executor.py
@@ -1,79 +1,22 @@
-"""Hook executor — calls a customer's external HTTP endpoint for a given hook point.
+"""CE hook executor.

-Usage (Celery tasks and FastAPI handlers):
-    result = execute_hook(
-        db_session=db_session,
-        hook_point=HookPoint.QUERY_PROCESSING,
-        payload={"query": "...", "user_email": "...", "chat_session_id": "..."},
-        response_type=QueryProcessingResponse,
-    )
+HookSkipped and HookSoftFailed are real classes kept here because
+process_message.py (CE code) uses isinstance checks against them.

-    if isinstance(result, HookSkipped):
-        # no active hook configured — continue with original behavior
-        ...
-    elif isinstance(result, HookSoftFailed):
-        # hook failed but fail strategy is SOFT — continue with original behavior
-        ...
-    else:
-        # result is a validated Pydantic model instance (response_type)
-        ...
-
-is_reachable update policy
--------------------------
-``is_reachable`` on the Hook row is updated selectively — only when the outcome
-carries meaningful signal about physical reachability:
-
-  NetworkError (DNS, connection refused)  → False  (cannot reach the server)
-  HTTP 401 / 403                          → False  (api_key revoked or invalid)
-  TimeoutException                        → None   (server may be slow, skip write)
-  Other HTTP errors (4xx / 5xx)           → None   (server responded, skip write)
-  Unknown exception                       → None   (no signal, skip write)
-  Non-JSON / non-dict response            → None   (server responded, skip write)
-  Success (2xx, valid dict)               → True   (confirmed reachable)
-
-None means "leave the current value unchanged" — no DB round-trip is made.
-
-DB session design
-----------------
-The executor uses three sessions:
-
-  1. Caller's session (db_session) — used only for the hook lookup read. All
-     needed fields are extracted from the Hook object before the HTTP call, so
-     the caller's session is not held open during the external HTTP request.
-
-  2. Log session — a separate short-lived session opened after the HTTP call
-     completes to write the HookExecutionLog row on failure. Success runs are
-     not recorded. Committed independently of everything else.
-
-  3. Reachable session — a second short-lived session to update is_reachable on
-     the Hook. Kept separate from the log session so a concurrent hook deletion
-     (which causes update_hook__no_commit to raise OnyxError(NOT_FOUND)) cannot
-     prevent the execution log from being written. This update is best-effort.
+execute_hook is the public entry point. It dispatches to _execute_hook_impl
+via fetch_versioned_implementation so that:
+  - CE: onyx.hooks.executor._execute_hook_impl → no-op, returns HookSkipped()
+  - EE: ee.onyx.hooks.executor._execute_hook_impl → real HTTP call
 """

-import json
-import time
 from typing import Any
 from typing import TypeVar

-import httpx
 from pydantic import BaseModel
-from pydantic import ValidationError
 from sqlalchemy.orm import Session

-from onyx.db.engine.sql_engine import get_session_with_current_tenant
-from onyx.db.enums import HookFailStrategy
 from onyx.db.enums import HookPoint
-from onyx.db.hook import create_hook_execution_log__no_commit
-from onyx.db.hook import get_non_deleted_hook_by_hook_point
-from onyx.db.hook import update_hook__no_commit
-from onyx.db.models import Hook
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
-from onyx.hooks.utils import HOOKS_AVAILABLE
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
+from onyx.utils.variable_functionality import fetch_versioned_implementation


 class HookSkipped:
@@ -87,277 +30,15 @@ class HookSoftFailed:
 T = TypeVar("T", bound=BaseModel)


-# ---------------------------------------------------------------------------
-# Private helpers
-# ---------------------------------------------------------------------------
-
-
-class _HttpOutcome(BaseModel):
-    """Structured result of an HTTP hook call, returned by _process_response."""
-
-    is_success: bool
-    updated_is_reachable: (
-        bool | None
-    )  # True/False = write to DB, None = unchanged (skip write)
-    status_code: int | None
-    error_message: str | None
-    response_payload: dict[str, Any] | None
-
-
-def _lookup_hook(
-    db_session: Session,
-    hook_point: HookPoint,
-) -> Hook | HookSkipped:
-    """Return the active Hook or HookSkipped if hooks are unavailable/unconfigured.
-
-    No HTTP call is made and no DB writes are performed for any HookSkipped path.
-    There is nothing to log and no reachability information to update.
-    """
-    if not HOOKS_AVAILABLE:
-        return HookSkipped()
-    hook = get_non_deleted_hook_by_hook_point(
-        db_session=db_session, hook_point=hook_point
-    )
-    if hook is None or not hook.is_active:
-        return HookSkipped()
-    if not hook.endpoint_url:
-        return HookSkipped()
-    return hook
-
-
-def _process_response(
+def _execute_hook_impl(
    *,
-    response: httpx.Response | None,
-    exc: Exception | None,
-    timeout: float,
-) -> _HttpOutcome:
-    """Process the result of an HTTP call and return a structured outcome.
-
-    Called after the client.post() try/except. If post() raised, exc is set and
-    response is None. Otherwise response is set and exc is None. Handles
-    raise_for_status(), JSON decoding, and the dict shape check.
-    """
-    if exc is not None:
-        if isinstance(exc, httpx.NetworkError):
-            msg = f"Hook network error (endpoint unreachable): {exc}"
-            logger.warning(msg, exc_info=exc)
-            return _HttpOutcome(
-                is_success=False,
-                updated_is_reachable=False,
-                status_code=None,
-                error_message=msg,
-                response_payload=None,
-            )
-        if isinstance(exc, httpx.TimeoutException):
-            msg = f"Hook timed out after {timeout}s: {exc}"
-            logger.warning(msg, exc_info=exc)
-            return _HttpOutcome(
-                is_success=False,
-                updated_is_reachable=None,  # timeout doesn't indicate unreachability
-                status_code=None,
-                error_message=msg,
-                response_payload=None,
-            )
-        msg = f"Hook call failed: {exc}"
-        logger.exception(msg, exc_info=exc)
-        return _HttpOutcome(
-            is_success=False,
-            updated_is_reachable=None,  # unknown error — don't make assumptions
-            status_code=None,
-            error_message=msg,
-            response_payload=None,
-        )
-
-    if response is None:
-        raise ValueError(
-            "exactly one of response or exc must be non-None; both are None"
-        )
-    status_code = response.status_code
-
-    try:
-        response.raise_for_status()
-    except httpx.HTTPStatusError as e:
-        msg = f"Hook returned HTTP {e.response.status_code}: {e.response.text}"
-        logger.warning(msg, exc_info=e)
-        # 401/403 means the api_key has been revoked or is invalid — mark unreachable
-        # so the operator knows to update it. All other HTTP errors keep is_reachable
-        # as-is (server is up, the request just failed for application reasons).
-        auth_failed = e.response.status_code in (401, 403)
-        return _HttpOutcome(
-            is_success=False,
-            updated_is_reachable=False if auth_failed else None,
-            status_code=status_code,
-            error_message=msg,
-            response_payload=None,
-        )
-
-    try:
-        response_payload = response.json()
-    except (json.JSONDecodeError, httpx.DecodingError) as e:
-        msg = f"Hook returned non-JSON response: {e}"
-        logger.warning(msg, exc_info=e)
-        return _HttpOutcome(
-            is_success=False,
-            updated_is_reachable=None,  # server responded — reachability unchanged
-            status_code=status_code,
-            error_message=msg,
-            response_payload=None,
-        )
-
-    if not isinstance(response_payload, dict):
-        msg = f"Hook returned non-dict JSON (got {type(response_payload).__name__})"
-        logger.warning(msg)
-        return _HttpOutcome(
-            is_success=False,
-            updated_is_reachable=None,  # server responded — reachability unchanged
-            status_code=status_code,
-            error_message=msg,
-            response_payload=None,
-        )
-
-    return _HttpOutcome(
-        is_success=True,
-        updated_is_reachable=True,
-        status_code=status_code,
-        error_message=None,
-        response_payload=response_payload,
-    )
-
-
-def _persist_result(
-    *,
-    hook_id: int,
-    outcome: _HttpOutcome,
-    duration_ms: int,
-) -> None:
-    """Write the execution log on failure and optionally update is_reachable, each
-    in its own session so a failure in one does not affect the other."""
-    # Only write the execution log on failure — success runs are not recorded.
-    # Must not be skipped if the is_reachable update fails (e.g. hook concurrently
-    # deleted between the initial lookup and here).
-    if not outcome.is_success:
-        try:
-            with get_session_with_current_tenant() as log_session:
-                create_hook_execution_log__no_commit(
-                    db_session=log_session,
-                    hook_id=hook_id,
-                    is_success=False,
-                    error_message=outcome.error_message,
-                    status_code=outcome.status_code,
-                    duration_ms=duration_ms,
-                )
-                log_session.commit()
-        except Exception:
-            logger.exception(
-                f"Failed to persist hook execution log for hook_id={hook_id}"
-            )
-
-    # Update is_reachable separately — best-effort, non-critical.
-    # None means the value is unchanged (set by the caller to skip the no-op write).
-    # update_hook__no_commit can raise OnyxError(NOT_FOUND) if the hook was
-    # concurrently deleted, so keep this isolated from the log write above.
-    if outcome.updated_is_reachable is not None:
-        try:
-            with get_session_with_current_tenant() as reachable_session:
-                update_hook__no_commit(
-                    db_session=reachable_session,
-                    hook_id=hook_id,
-                    is_reachable=outcome.updated_is_reachable,
-                )
-                reachable_session.commit()
-        except Exception:
-            logger.warning(f"Failed to update is_reachable for hook_id={hook_id}")
-
-
-# ---------------------------------------------------------------------------
-# Public API
-# ---------------------------------------------------------------------------
-
-
-def _execute_hook_inner(
-    hook: Hook,
-    payload: dict[str, Any],
-    response_type: type[T],
-) -> T | HookSoftFailed:
-    """Make the HTTP call, validate the response, and return a typed model.
-
-    Raises OnyxError on HARD failure. Returns HookSoftFailed on SOFT failure.
-    """
-    timeout = hook.timeout_seconds
-    hook_id = hook.id
-    fail_strategy = hook.fail_strategy
-    endpoint_url = hook.endpoint_url
-    current_is_reachable: bool | None = hook.is_reachable
-
-    if not endpoint_url:
-        raise ValueError(
-            f"hook_id={hook_id} is active but has no endpoint_url — "
-            "active hooks without an endpoint_url must be rejected by _lookup_hook"
-        )
-
-    start = time.monotonic()
-    response: httpx.Response | None = None
-    exc: Exception | None = None
-    try:
-        api_key: str | None = (
-            hook.api_key.get_value(apply_mask=False) if hook.api_key else None
-        )
-        headers: dict[str, str] = {"Content-Type": "application/json"}
-        if api_key:
-            headers["Authorization"] = f"Bearer {api_key}"
-        with httpx.Client(
-            timeout=timeout, follow_redirects=False
-        ) as client:  # SSRF guard: never follow redirects
-            response = client.post(endpoint_url, json=payload, headers=headers)
-    except Exception as e:
-        exc = e
-    duration_ms = int((time.monotonic() - start) * 1000)
-
-    outcome = _process_response(response=response, exc=exc, timeout=timeout)
-
-    # Validate the response payload against response_type.
-    # A validation failure downgrades the outcome to a failure so it is logged,
-    # is_reachable is left unchanged (server responded — just a bad payload),
-    # and fail_strategy is respected below.
-    validated_model: T | None = None
-    if outcome.is_success and outcome.response_payload is not None:
-        try:
-            validated_model = response_type.model_validate(outcome.response_payload)
-        except ValidationError as e:
-            msg = (
-                f"Hook response failed validation against {response_type.__name__}: {e}"
-            )
-            outcome = _HttpOutcome(
-                is_success=False,
-                updated_is_reachable=None,  # server responded — reachability unchanged
-                status_code=outcome.status_code,
-                error_message=msg,
-                response_payload=None,
-            )
-
-    # Skip the is_reachable write when the value would not change — avoids a
-    # no-op DB round-trip on every call when the hook is already in the expected state.
-    if outcome.updated_is_reachable == current_is_reachable:
-        outcome = outcome.model_copy(update={"updated_is_reachable": None})
-    _persist_result(hook_id=hook_id, outcome=outcome, duration_ms=duration_ms)
-
-    if not outcome.is_success:
-        if fail_strategy == HookFailStrategy.HARD:
-            raise OnyxError(
-                OnyxErrorCode.HOOK_EXECUTION_FAILED,
-                outcome.error_message or "Hook execution failed.",
-            )
-        logger.warning(
-            f"Hook execution failed (soft fail) for hook_id={hook_id}: {outcome.error_message}"
-        )
-        return HookSoftFailed()
-
-    if validated_model is None:
-        raise OnyxError(
-            OnyxErrorCode.INTERNAL_ERROR,
-            f"validated_model is None for successful hook call (hook_id={hook_id})",
-        )
-    return validated_model
+    db_session: Session,  # noqa: ARG001
+    hook_point: HookPoint,  # noqa: ARG001
+    payload: dict[str, Any],  # noqa: ARG001
+    response_type: type[T],  # noqa: ARG001
+) -> T | HookSkipped | HookSoftFailed:
+    """CE no-op — hooks are not available without EE."""
+    return HookSkipped()


 def execute_hook(
@@ -367,25 +48,15 @@ def execute_hook(
    payload: dict[str, Any],
    response_type: type[T],
 ) -> T | HookSkipped | HookSoftFailed:
-    """Execute the hook for the given hook point synchronously.
+    """Execute the hook for the given hook point.

-    Returns HookSkipped if no active hook is configured, HookSoftFailed if the
-    hook failed with SOFT fail strategy, or a validated response model on success.
-    Raises OnyxError on HARD failure or if the hook is misconfigured.
+    Dispatches to the versioned implementation so EE gets the real executor
+    and CE gets the no-op stub, without any changes at the call site.
    """
-    hook = _lookup_hook(db_session, hook_point)
-    if isinstance(hook, HookSkipped):
-        return hook
-
-    fail_strategy = hook.fail_strategy
-    hook_id = hook.id
-
-    try:
-        return _execute_hook_inner(hook, payload, response_type)
-    except Exception:
-        if fail_strategy == HookFailStrategy.SOFT:
-            logger.exception(
-                f"Unexpected error in hook execution (soft fail) for hook_id={hook_id}"
-            )
-            return HookSoftFailed()
-        raise
+    impl = fetch_versioned_implementation("onyx.hooks.executor", "_execute_hook_impl")
+    return impl(
+        db_session=db_session,
+        hook_point=hook_point,
+        payload=payload,
+        response_type=response_type,
+    )
--- a/backend/onyx/hooks/utils.py
+++ b/backend/onyx/hooks/utils.py
@@ -1,5 +0,0 @@
-from onyx.configs.app_configs import HOOK_ENABLED
-from shared_configs.configs import MULTI_TENANT
-
-# True only when hooks are available: single-tenant deployment with HOOK_ENABLED=true.
-HOOKS_AVAILABLE: bool = HOOK_ENABLED and not MULTI_TENANT
--- a/backend/onyx/indexing/adapters/document_indexing_adapter.py
+++ b/backend/onyx/indexing/adapters/document_indexing_adapter.py
@@ -19,7 +19,8 @@ from onyx.db.document import update_docs_updated_at__no_commit
 from onyx.db.document_set import fetch_document_sets_for_documents
 from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
 from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
-from onyx.indexing.models import BuildMetadataAwareChunksResult
+from onyx.indexing.models import ChunkEnrichmentContext
+from onyx.indexing.models import DocAwareChunk
 from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import IndexChunk
 from onyx.indexing.models import UpdatableChunkData
@@ -85,14 +86,21 @@ class DocumentIndexingBatchAdapter:
        ) as transaction:
            yield transaction

-    def build_metadata_aware_chunks(
+    def prepare_enrichment(
        self,
-        chunks_with_embeddings: list[IndexChunk],
-        chunk_content_scores: list[float],
-        tenant_id: str,
        context: DocumentBatchPrepareContext,
-    ) -> BuildMetadataAwareChunksResult:
-        """Enrich chunks with access, document sets, boosts, token counts, and hierarchy."""
+        tenant_id: str,
+        chunks: list[DocAwareChunk],
+    ) -> "DocumentChunkEnricher":
+        """Do all DB lookups once and return a per-chunk enricher."""
+        updatable_ids = [doc.id for doc in context.updatable_docs]
+
+        doc_id_to_new_chunk_cnt: dict[str, int] = {
+            doc_id: 0 for doc_id in updatable_ids
+        }
+        for chunk in chunks:
+            if chunk.source_document.id in doc_id_to_new_chunk_cnt:
+                doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1

        no_access = DocumentAccess.build(
            user_emails=[],
@@ -102,67 +110,30 @@ class DocumentIndexingBatchAdapter:
            is_public=False,
        )

-        updatable_ids = [doc.id for doc in context.updatable_docs]
-
-        doc_id_to_access_info = get_access_for_documents(
-            document_ids=updatable_ids, db_session=self.db_session
-        )
-        doc_id_to_document_set = {
-            document_id: document_sets
-            for document_id, document_sets in fetch_document_sets_for_documents(
+        return DocumentChunkEnricher(
+            doc_id_to_access_info=get_access_for_documents(
                document_ids=updatable_ids, db_session=self.db_session
-            )
-        }
-
-        doc_id_to_previous_chunk_cnt: dict[str, int] = {
-            document_id: chunk_count
-            for document_id, chunk_count in fetch_chunk_counts_for_documents(
-                document_ids=updatable_ids,
-                db_session=self.db_session,
-            )
-        }
-
-        doc_id_to_new_chunk_cnt: dict[str, int] = {
-            doc_id: 0 for doc_id in updatable_ids
-        }
-        for chunk in chunks_with_embeddings:
-            if chunk.source_document.id in doc_id_to_new_chunk_cnt:
-                doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
-
-        # Get ancestor hierarchy node IDs for each document
-        doc_id_to_ancestor_ids = self._get_ancestor_ids_for_documents(
-            context.updatable_docs, tenant_id
-        )
-
-        access_aware_chunks = [
-            DocMetadataAwareIndexChunk.from_index_chunk(
-                index_chunk=chunk,
-                access=doc_id_to_access_info.get(chunk.source_document.id, no_access),
-                document_sets=set(
-                    doc_id_to_document_set.get(chunk.source_document.id, [])
-                ),
-                user_project=[],
-                personas=[],
-                boost=(
-                    context.id_to_boost_map[chunk.source_document.id]
-                    if chunk.source_document.id in context.id_to_boost_map
-                    else DEFAULT_BOOST
-                ),
-                tenant_id=tenant_id,
-                aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
-                ancestor_hierarchy_node_ids=doc_id_to_ancestor_ids[
-                    chunk.source_document.id
-                ],
-            )
-            for chunk_num, chunk in enumerate(chunks_with_embeddings)
-        ]
-
-        return BuildMetadataAwareChunksResult(
-            chunks=access_aware_chunks,
-            doc_id_to_previous_chunk_cnt=doc_id_to_previous_chunk_cnt,
-            doc_id_to_new_chunk_cnt=doc_id_to_new_chunk_cnt,
-            user_file_id_to_raw_text={},
-            user_file_id_to_token_count={},
+            ),
+            doc_id_to_document_set={
+                document_id: document_sets
+                for document_id, document_sets in fetch_document_sets_for_documents(
+                    document_ids=updatable_ids, db_session=self.db_session
+                )
+            },
+            doc_id_to_ancestor_ids=self._get_ancestor_ids_for_documents(
+                context.updatable_docs, tenant_id
+            ),
+            id_to_boost_map=context.id_to_boost_map,
+            doc_id_to_previous_chunk_cnt={
+                document_id: chunk_count
+                for document_id, chunk_count in fetch_chunk_counts_for_documents(
+                    document_ids=updatable_ids,
+                    db_session=self.db_session,
+                )
+            },
+            doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),
+            no_access=no_access,
+            tenant_id=tenant_id,
        )

    def _get_ancestor_ids_for_documents(
@@ -203,7 +174,7 @@ class DocumentIndexingBatchAdapter:
        context: DocumentBatchPrepareContext,
        updatable_chunk_data: list[UpdatableChunkData],
        filtered_documents: list[Document],
-        result: BuildMetadataAwareChunksResult,
+        enrichment: ChunkEnrichmentContext,
    ) -> None:
        """Finalize DB updates, store plaintext, and mark docs as indexed."""
        updatable_ids = [doc.id for doc in context.updatable_docs]
@@ -227,7 +198,7 @@ class DocumentIndexingBatchAdapter:

        update_docs_chunk_count__no_commit(
            document_ids=updatable_ids,
-            doc_id_to_chunk_count=result.doc_id_to_new_chunk_cnt,
+            doc_id_to_chunk_count=enrichment.doc_id_to_new_chunk_cnt,
            db_session=self.db_session,
        )

@@ -249,3 +220,52 @@ class DocumentIndexingBatchAdapter:
        )

        self.db_session.commit()
+
+
+class DocumentChunkEnricher:
+    """Pre-computed metadata for per-chunk enrichment of connector documents."""
+
+    def __init__(
+        self,
+        doc_id_to_access_info: dict[str, DocumentAccess],
+        doc_id_to_document_set: dict[str, list[str]],
+        doc_id_to_ancestor_ids: dict[str, list[int]],
+        id_to_boost_map: dict[str, int],
+        doc_id_to_previous_chunk_cnt: dict[str, int],
+        doc_id_to_new_chunk_cnt: dict[str, int],
+        no_access: DocumentAccess,
+        tenant_id: str,
+    ) -> None:
+        self._doc_id_to_access_info = doc_id_to_access_info
+        self._doc_id_to_document_set = doc_id_to_document_set
+        self._doc_id_to_ancestor_ids = doc_id_to_ancestor_ids
+        self._id_to_boost_map = id_to_boost_map
+        self._no_access = no_access
+        self._tenant_id = tenant_id
+        self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt
+        self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt
+
+    def enrich_chunk(
+        self, chunk: IndexChunk, score: float
+    ) -> DocMetadataAwareIndexChunk:
+        return DocMetadataAwareIndexChunk.from_index_chunk(
+            index_chunk=chunk,
+            access=self._doc_id_to_access_info.get(
+                chunk.source_document.id, self._no_access
+            ),
+            document_sets=set(
+                self._doc_id_to_document_set.get(chunk.source_document.id, [])
+            ),
+            user_project=[],
+            personas=[],
+            boost=(
+                self._id_to_boost_map[chunk.source_document.id]
+                if chunk.source_document.id in self._id_to_boost_map
+                else DEFAULT_BOOST
+            ),
+            tenant_id=self._tenant_id,
+            aggregated_chunk_boost_factor=score,
+            ancestor_hierarchy_node_ids=self._doc_id_to_ancestor_ids[
+                chunk.source_document.id
+            ],
+        )
--- a/backend/onyx/indexing/adapters/user_file_indexing_adapter.py
+++ b/backend/onyx/indexing/adapters/user_file_indexing_adapter.py
@@ -1,6 +1,9 @@
+from __future__ import annotations
+
 import contextlib
 import datetime
 import time
+from collections import defaultdict
 from collections.abc import Generator
 from uuid import UUID

@@ -24,7 +27,8 @@ from onyx.db.user_file import fetch_persona_ids_for_user_files
 from onyx.db.user_file import fetch_user_project_ids_for_user_files
 from onyx.file_store.utils import store_user_file_plaintext
 from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
-from onyx.indexing.models import BuildMetadataAwareChunksResult
+from onyx.indexing.models import ChunkEnrichmentContext
+from onyx.indexing.models import DocAwareChunk
 from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import IndexChunk
 from onyx.indexing.models import UpdatableChunkData
@@ -102,13 +106,20 @@ class UserFileIndexingAdapter:
                f"Failed to acquire locks after {_NUM_LOCK_ATTEMPTS} attempts for user files: {[doc.id for doc in documents]}"
            )

-    def build_metadata_aware_chunks(
+    def prepare_enrichment(
        self,
-        chunks_with_embeddings: list[IndexChunk],
-        chunk_content_scores: list[float],
-        tenant_id: str,
        context: DocumentBatchPrepareContext,
-    ) -> BuildMetadataAwareChunksResult:
+        tenant_id: str,
+        chunks: list[DocAwareChunk],
+    ) -> UserFileChunkEnricher:
+        """Do all DB lookups and pre-compute file metadata from chunks."""
+        updatable_ids = [doc.id for doc in context.updatable_docs]
+
+        doc_id_to_new_chunk_cnt: dict[str, int] = defaultdict(int)
+        content_by_file: dict[str, list[str]] = defaultdict(list)
+        for chunk in chunks:
+            doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
+            content_by_file[chunk.source_document.id].append(chunk.content)

        no_access = DocumentAccess.build(
            user_emails=[],
@@ -118,7 +129,6 @@ class UserFileIndexingAdapter:
            is_public=False,
        )

-        updatable_ids = [doc.id for doc in context.updatable_docs]
        user_file_id_to_project_ids = fetch_user_project_ids_for_user_files(
            user_file_ids=updatable_ids,
            db_session=self.db_session,
@@ -139,17 +149,6 @@ class UserFileIndexingAdapter:
            )
        }

-        user_file_id_to_new_chunk_cnt: dict[str, int] = {
-            user_file_id: len(
-                [
-                    chunk
-                    for chunk in chunks_with_embeddings
-                    if chunk.source_document.id == user_file_id
-                ]
-            )
-            for user_file_id in updatable_ids
-        }
-
        # Initialize tokenizer used for token count calculation
        try:
            llm = get_default_llm()
@@ -164,15 +163,9 @@ class UserFileIndexingAdapter:
        user_file_id_to_raw_text: dict[str, str] = {}
        user_file_id_to_token_count: dict[str, int | None] = {}
        for user_file_id in updatable_ids:
-            user_file_chunks = [
-                chunk
-                for chunk in chunks_with_embeddings
-                if chunk.source_document.id == user_file_id
-            ]
-            if user_file_chunks:
-                combined_content = " ".join(
-                    [chunk.content for chunk in user_file_chunks]
-                )
+            contents = content_by_file.get(user_file_id)
+            if contents:
+                combined_content = " ".join(contents)
                user_file_id_to_raw_text[str(user_file_id)] = combined_content
                token_count: int = (
                    count_tokens(combined_content, llm_tokenizer)
@@ -184,28 +177,16 @@ class UserFileIndexingAdapter:
                user_file_id_to_raw_text[str(user_file_id)] = ""
                user_file_id_to_token_count[str(user_file_id)] = None

-        access_aware_chunks = [
-            DocMetadataAwareIndexChunk.from_index_chunk(
-                index_chunk=chunk,
-                access=user_file_id_to_access.get(chunk.source_document.id, no_access),
-                document_sets=set(),
-                user_project=user_file_id_to_project_ids.get(
-                    chunk.source_document.id, []
-                ),
-                personas=user_file_id_to_persona_ids.get(chunk.source_document.id, []),
-                boost=DEFAULT_BOOST,
-                tenant_id=tenant_id,
-                aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
-            )
-            for chunk_num, chunk in enumerate(chunks_with_embeddings)
-        ]
-
-        return BuildMetadataAwareChunksResult(
-            chunks=access_aware_chunks,
+        return UserFileChunkEnricher(
+            user_file_id_to_access=user_file_id_to_access,
+            user_file_id_to_project_ids=user_file_id_to_project_ids,
+            user_file_id_to_persona_ids=user_file_id_to_persona_ids,
            doc_id_to_previous_chunk_cnt=user_file_id_to_previous_chunk_cnt,
-            doc_id_to_new_chunk_cnt=user_file_id_to_new_chunk_cnt,
+            doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),
            user_file_id_to_raw_text=user_file_id_to_raw_text,
            user_file_id_to_token_count=user_file_id_to_token_count,
+            no_access=no_access,
+            tenant_id=tenant_id,
        )

    def _notify_assistant_owners_if_files_ready(
@@ -249,8 +230,9 @@ class UserFileIndexingAdapter:
        context: DocumentBatchPrepareContext,
        updatable_chunk_data: list[UpdatableChunkData],  # noqa: ARG002
        filtered_documents: list[Document],  # noqa: ARG002
-        result: BuildMetadataAwareChunksResult,
+        enrichment: ChunkEnrichmentContext,
    ) -> None:
+        assert isinstance(enrichment, UserFileChunkEnricher)
        user_file_ids = [doc.id for doc in context.updatable_docs]

        user_files = (
@@ -266,8 +248,10 @@ class UserFileIndexingAdapter:
            user_file.last_project_sync_at = datetime.datetime.now(
                datetime.timezone.utc
            )
-            user_file.chunk_count = result.doc_id_to_new_chunk_cnt[str(user_file.id)]
-            user_file.token_count = result.user_file_id_to_token_count[
+            user_file.chunk_count = enrichment.doc_id_to_new_chunk_cnt.get(
+                str(user_file.id), 0
+            )
+            user_file.token_count = enrichment.user_file_id_to_token_count[
                str(user_file.id)
            ]

@@ -279,8 +263,54 @@ class UserFileIndexingAdapter:
        # Store the plaintext in the file store for faster retrieval
        # NOTE: this creates its own session to avoid committing the overall
        # transaction.
-        for user_file_id, raw_text in result.user_file_id_to_raw_text.items():
+        for user_file_id, raw_text in enrichment.user_file_id_to_raw_text.items():
            store_user_file_plaintext(
                user_file_id=UUID(user_file_id),
                plaintext_content=raw_text,
            )
+
+
+class UserFileChunkEnricher:
+    """Pre-computed metadata for per-chunk enrichment of user-uploaded files."""
+
+    def __init__(
+        self,
+        user_file_id_to_access: dict[str, DocumentAccess],
+        user_file_id_to_project_ids: dict[str, list[int]],
+        user_file_id_to_persona_ids: dict[str, list[int]],
+        doc_id_to_previous_chunk_cnt: dict[str, int],
+        doc_id_to_new_chunk_cnt: dict[str, int],
+        user_file_id_to_raw_text: dict[str, str],
+        user_file_id_to_token_count: dict[str, int | None],
+        no_access: DocumentAccess,
+        tenant_id: str,
+    ) -> None:
+        self._user_file_id_to_access = user_file_id_to_access
+        self._user_file_id_to_project_ids = user_file_id_to_project_ids
+        self._user_file_id_to_persona_ids = user_file_id_to_persona_ids
+        self._no_access = no_access
+        self._tenant_id = tenant_id
+        self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt
+        self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt
+        self.user_file_id_to_raw_text = user_file_id_to_raw_text
+        self.user_file_id_to_token_count = user_file_id_to_token_count
+
+    def enrich_chunk(
+        self, chunk: IndexChunk, score: float
+    ) -> DocMetadataAwareIndexChunk:
+        return DocMetadataAwareIndexChunk.from_index_chunk(
+            index_chunk=chunk,
+            access=self._user_file_id_to_access.get(
+                chunk.source_document.id, self._no_access
+            ),
+            document_sets=set(),
+            user_project=self._user_file_id_to_project_ids.get(
+                chunk.source_document.id, []
+            ),
+            personas=self._user_file_id_to_persona_ids.get(
+                chunk.source_document.id, []
+            ),
+            boost=DEFAULT_BOOST,
+            tenant_id=self._tenant_id,
+            aggregated_chunk_boost_factor=score,
+        )
--- a/backend/onyx/indexing/chunk_batch_store.py
+++ b/backend/onyx/indexing/chunk_batch_store.py
@@ -0,0 +1,89 @@
+import pickle
+import shutil
+import tempfile
+from collections.abc import Iterator
+from pathlib import Path
+
+from onyx.indexing.models import IndexChunk
+
+
+class ChunkBatchStore:
+    """Manages serialization of embedded chunks to a temporary directory.
+
+    Owns the temp directory lifetime and provides save/load/stream/scrub
+    operations.
+
+    Use as a context manager to ensure cleanup::
+
+        with ChunkBatchStore() as store:
+            store.save(chunks, batch_idx=0)
+            for chunk in store.stream():
+                ...
+    """
+
+    _EXT = ".pkl"
+
+    def __init__(self) -> None:
+        self._tmpdir: Path | None = None
+
+    # -- context manager -----------------------------------------------------
+
+    def __enter__(self) -> "ChunkBatchStore":
+        self._tmpdir = Path(tempfile.mkdtemp(prefix="onyx_embeddings_"))
+        return self
+
+    def __exit__(self, *_exc: object) -> None:
+        if self._tmpdir is not None:
+            shutil.rmtree(self._tmpdir, ignore_errors=True)
+            self._tmpdir = None
+
+    @property
+    def _dir(self) -> Path:
+        assert self._tmpdir is not None, "ChunkBatchStore used outside context manager"
+        return self._tmpdir
+
+    # -- storage primitives --------------------------------------------------
+
+    def save(self, chunks: list[IndexChunk], batch_idx: int) -> None:
+        """Serialize a batch of embedded chunks to disk."""
+        with open(self._dir / f"batch_{batch_idx}{self._EXT}", "wb") as f:
+            pickle.dump(chunks, f)
+
+    def _load(self, batch_file: Path) -> list[IndexChunk]:
+        """Deserialize a batch of embedded chunks from a file."""
+        with open(batch_file, "rb") as f:
+            return pickle.load(f)
+
+    def _batch_files(self) -> list[Path]:
+        """Return batch files sorted by numeric index."""
+        return sorted(
+            self._dir.glob(f"batch_*{self._EXT}"),
+            key=lambda p: int(p.stem.removeprefix("batch_")),
+        )
+
+    # -- higher-level operations ---------------------------------------------
+
+    def stream(self) -> Iterator[IndexChunk]:
+        """Yield all chunks across all batch files.
+
+        Each call returns a fresh generator, so the data can be iterated
+        multiple times (e.g. once per document index).
+        """
+        for batch_file in self._batch_files():
+            yield from self._load(batch_file)
+
+    def scrub_failed_docs(self, failed_doc_ids: set[str]) -> None:
+        """Remove chunks belonging to *failed_doc_ids* from all batch files.
+
+        When a document fails embedding in batch N, earlier batches may
+        already contain successfully embedded chunks for that document.
+        This ensures the output is all-or-nothing per document.
+        """
+        for batch_file in self._batch_files():
+            batch_chunks = self._load(batch_file)
+            cleaned = [
+                c for c in batch_chunks if c.source_document.id not in failed_doc_ids
+            ]
+            if len(cleaned) != len(batch_chunks):
+                with open(batch_file, "wb") as f:
+                    pickle.dump(cleaned, f)
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -1,5 +1,8 @@
 from collections import defaultdict
 from collections.abc import Callable
+from collections.abc import Generator
+from collections.abc import Iterator
+from contextlib import contextmanager
 from typing import Protocol

 from pydantic import BaseModel
@@ -9,6 +12,7 @@ from sqlalchemy.orm import Session
 from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_NAME
 from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_PROVIDER
 from onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG
+from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
 from onyx.configs.app_configs import MAX_DOCUMENT_CHARS
 from onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION
 from onyx.configs.app_configs import USE_CHUNK_SUMMARY
@@ -43,10 +47,12 @@ from onyx.document_index.interfaces import DocumentMetadata
 from onyx.document_index.interfaces import IndexBatchParams
 from onyx.file_processing.image_summarization import summarize_image_with_error_handling
 from onyx.file_store.file_store import get_default_file_store
+from onyx.indexing.chunk_batch_store import ChunkBatchStore
 from onyx.indexing.chunker import Chunker
 from onyx.indexing.embedder import embed_chunks_with_failure_handling
 from onyx.indexing.embedder import IndexingEmbedder
 from onyx.indexing.models import DocAwareChunk
+from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import IndexingBatchAdapter
 from onyx.indexing.models import UpdatableChunkData
 from onyx.indexing.vector_db_insertion import write_chunks_to_vector_db_with_backoff
@@ -63,6 +69,7 @@ from onyx.natural_language_processing.utils import tokenizer_trim_middle
 from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT1
 from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT2
 from onyx.prompts.contextual_retrieval import DOCUMENT_SUMMARY_PROMPT
+from onyx.utils.batching import batch_generator
 from onyx.utils.logger import setup_logger
 from onyx.utils.postgres_sanitization import sanitize_documents_for_postgres
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
@@ -91,6 +98,20 @@ class IndexingPipelineResult(BaseModel):

    failures: list[ConnectorFailure]

+    @classmethod
+    def empty(cls, total_docs: int) -> "IndexingPipelineResult":
+        return cls(
+            new_docs=0,
+            total_docs=total_docs,
+            total_chunks=0,
+            failures=[],
+        )
+
+
+class ChunkEmbeddingResult(BaseModel):
+    successful_chunk_ids: list[tuple[int, str]]  # (chunk_id, document_id)
+    connector_failures: list[ConnectorFailure]
+

 class IndexingPipelineProtocol(Protocol):
    def __call__(
@@ -139,6 +160,110 @@ def _upsert_documents_in_db(
        )


+def _get_failed_doc_ids(failures: list[ConnectorFailure]) -> set[str]:
+    """Extract document IDs from a list of connector failures."""
+    return {f.failed_document.document_id for f in failures if f.failed_document}
+
+
+def _embed_chunks_to_store(
+    chunks: list[DocAwareChunk],
+    embedder: IndexingEmbedder,
+    tenant_id: str,
+    request_id: str | None,
+    store: ChunkBatchStore,
+) -> ChunkEmbeddingResult:
+    """Embed chunks in batches, spilling each batch to *store*.
+
+    If a document fails embedding in any batch, its chunks are excluded from
+    all batches (including earlier ones already written) so that the output
+    is all-or-nothing per document.
+    """
+    successful_chunk_ids: list[tuple[int, str]] = []
+    all_embedding_failures: list[ConnectorFailure] = []
+    # Track failed doc IDs across all batches so that a failure in batch N
+    # causes chunks for that doc to be skipped in batch N+1 and stripped
+    # from earlier batches.
+    all_failed_doc_ids: set[str] = set()
+
+    for batch_idx, chunk_batch in enumerate(
+        batch_generator(chunks, MAX_CHUNKS_PER_DOC_BATCH)
+    ):
+        # Skip chunks belonging to documents that failed in earlier batches.
+        chunk_batch = [
+            c for c in chunk_batch if c.source_document.id not in all_failed_doc_ids
+        ]
+        if not chunk_batch:
+            continue
+
+        logger.debug(f"Embedding batch {batch_idx}: {len(chunk_batch)} chunks")
+
+        chunks_with_embeddings, embedding_failures = embed_chunks_with_failure_handling(
+            chunks=chunk_batch,
+            embedder=embedder,
+            tenant_id=tenant_id,
+            request_id=request_id,
+        )
+        all_embedding_failures.extend(embedding_failures)
+        all_failed_doc_ids.update(_get_failed_doc_ids(embedding_failures))
+
+        # Only keep successfully embedded chunks for non-failed docs.
+        chunks_with_embeddings = [
+            c
+            for c in chunks_with_embeddings
+            if c.source_document.id not in all_failed_doc_ids
+        ]
+
+        successful_chunk_ids.extend(
+            (c.chunk_id, c.source_document.id) for c in chunks_with_embeddings
+        )
+
+        store.save(chunks_with_embeddings, batch_idx)
+        del chunks_with_embeddings
+
+    # Scrub earlier batches for docs that failed in later batches.
+    if all_failed_doc_ids:
+        store.scrub_failed_docs(all_failed_doc_ids)
+        successful_chunk_ids = [
+            (chunk_id, doc_id)
+            for chunk_id, doc_id in successful_chunk_ids
+            if doc_id not in all_failed_doc_ids
+        ]
+
+    return ChunkEmbeddingResult(
+        successful_chunk_ids=successful_chunk_ids,
+        connector_failures=all_embedding_failures,
+    )
+
+
+@contextmanager
+def embed_and_stream(
+    chunks: list[DocAwareChunk],
+    embedder: IndexingEmbedder,
+    tenant_id: str,
+    request_id: str | None,
+) -> Generator[tuple[ChunkEmbeddingResult, ChunkBatchStore], None, None]:
+    """Embed chunks to disk and yield a ``(result, store)`` pair.
+
+    The store owns the temp directory — files are cleaned up when the context
+    manager exits.
+
+    Usage::
+
+        with embed_and_stream(chunks, embedder, tenant_id, req_id) as (result, store):
+            for chunk in store.stream():
+                ...
+    """
+    with ChunkBatchStore() as store:
+        result = _embed_chunks_to_store(
+            chunks=chunks,
+            embedder=embedder,
+            tenant_id=tenant_id,
+            request_id=request_id,
+            store=store,
+        )
+        yield result, store
+
+
 def get_doc_ids_to_update(
    documents: list[Document], db_docs: list[DBDocument]
 ) -> list[Document]:
@@ -637,6 +762,29 @@ def add_contextual_summaries(
    return chunks


+def _verify_indexing_completeness(
+    insertion_records: list[DocumentInsertionRecord],
+    write_failures: list[ConnectorFailure],
+    embedding_failed_doc_ids: set[str],
+    updatable_ids: list[str],
+    document_index_name: str,
+) -> None:
+    """Verify that every updatable document was either indexed or reported as failed."""
+    all_returned_doc_ids = (
+        {r.document_id for r in insertion_records}
+        | {f.failed_document.document_id for f in write_failures if f.failed_document}
+        | embedding_failed_doc_ids
+    )
+    if all_returned_doc_ids != set(updatable_ids):
+        raise RuntimeError(
+            f"Some documents were not successfully indexed. "
+            f"Updatable IDs: {updatable_ids}, "
+            f"Returned IDs: {all_returned_doc_ids}. "
+            f"This should never happen. "
+            f"This occured for document index {document_index_name}"
+        )
+
+
@log_function_time(debug_only=True)
 def index_doc_batch(
    *,
@@ -672,12 +820,7 @@ def index_doc_batch(
    filtered_documents = filter_fnc(document_batch)
    context = adapter.prepare(filtered_documents, ignore_time_skip)
    if not context:
-        return IndexingPipelineResult(
-            new_docs=0,
-            total_docs=len(filtered_documents),
-            total_chunks=0,
-            failures=[],
-        )
+        return IndexingPipelineResult.empty(len(filtered_documents))

    # Convert documents to IndexingDocument objects with processed section
    # logger.debug("Processing image sections")
@@ -716,117 +859,99 @@ def index_doc_batch(
        )

    logger.debug("Starting embedding")
-    chunks_with_embeddings, embedding_failures = (
-        embed_chunks_with_failure_handling(
-            chunks=chunks,
-            embedder=embedder,
-            tenant_id=tenant_id,
-            request_id=request_id,
-        )
-        if chunks
-        else ([], [])
-    )
+    with embed_and_stream(chunks, embedder, tenant_id, request_id) as (
+        embedding_result,
+        chunk_store,
+    ):
+        updatable_ids = [doc.id for doc in context.updatable_docs]
+        updatable_chunk_data = [
+            UpdatableChunkData(
+                chunk_id=chunk_id,
+                document_id=document_id,
+                boost_score=1.0,
+            )
+            for chunk_id, document_id in embedding_result.successful_chunk_ids
+        ]

-    chunk_content_scores = [1.0] * len(chunks_with_embeddings)
-
-    updatable_ids = [doc.id for doc in context.updatable_docs]
-    updatable_chunk_data = [
-        UpdatableChunkData(
-            chunk_id=chunk.chunk_id,
-            document_id=chunk.source_document.id,
-            boost_score=score,
-        )
-        for chunk, score in zip(chunks_with_embeddings, chunk_content_scores)
-    ]
-
-    # Acquires a lock on the documents so that no other process can modify them
-    # NOTE: don't need to acquire till here, since this is when the actual race condition
-    # with Vespa can occur.
-    with adapter.lock_context(context.updatable_docs):
-        # we're concerned about race conditions where multiple simultaneous indexings might result
-        # in one set of metadata overwriting another one in vespa.
-        # we still write data here for the immediate and most likely correct sync, but
-        # to resolve this, an update of the last modified field at the end of this loop
-        # always triggers a final metadata sync via the celery queue
-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=chunks_with_embeddings,
-            chunk_content_scores=chunk_content_scores,
-            tenant_id=tenant_id,
-            context=context,
+        embedding_failed_doc_ids = _get_failed_doc_ids(
+            embedding_result.connector_failures
        )

-        short_descriptor_list = [chunk.to_short_descriptor() for chunk in result.chunks]
-        short_descriptor_log = str(short_descriptor_list)[:1024]
-        logger.debug(f"Indexing the following chunks: {short_descriptor_log}")
+        # Filter to only successfully embedded chunks so
+        # doc_id_to_new_chunk_cnt reflects what's actually written to Vespa.
+        embedded_chunks = [
+            c for c in chunks if c.source_document.id not in embedding_failed_doc_ids
+        ]

-        primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = None
-        primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = None
-        for document_index in document_indices:
-            # A document will not be spread across different batches, so all the
-            # documents with chunks in this set, are fully represented by the chunks
-            # in this set
-            (
-                insertion_records,
-                vector_db_write_failures,
-            ) = write_chunks_to_vector_db_with_backoff(
-                document_index=document_index,
-                chunks=result.chunks,
-                index_batch_params=IndexBatchParams(
-                    doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
-                    doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
-                    tenant_id=tenant_id,
-                    large_chunks_enabled=chunker.enable_large_chunks,
-                ),
+        # Acquires a lock on the documents so that no other process can modify
+        # them.  Not needed until here, since this is when the actual race
+        # condition with vector db can occur.
+        with adapter.lock_context(context.updatable_docs):
+            enricher = adapter.prepare_enrichment(
+                context=context,
+                tenant_id=tenant_id,
+                chunks=embedded_chunks,
            )

-            all_returned_doc_ids: set[str] = (
-                {record.document_id for record in insertion_records}
-                .union(
-                    {
-                        record.failed_document.document_id
-                        for record in vector_db_write_failures
-                        if record.failed_document
-                    }
-                )
-                .union(
-                    {
-                        record.failed_document.document_id
-                        for record in embedding_failures
-                        if record.failed_document
-                    }
-                )
+            index_batch_params = IndexBatchParams(
+                doc_id_to_previous_chunk_cnt=enricher.doc_id_to_previous_chunk_cnt,
+                doc_id_to_new_chunk_cnt=enricher.doc_id_to_new_chunk_cnt,
+                tenant_id=tenant_id,
+                large_chunks_enabled=chunker.enable_large_chunks,
            )
-            if all_returned_doc_ids != set(updatable_ids):
-                raise RuntimeError(
-                    f"Some documents were not successfully indexed. "
-                    f"Updatable IDs: {updatable_ids}, "
-                    f"Returned IDs: {all_returned_doc_ids}. "
-                    "This should never happen."
-                    f"This occured for document index {document_index.__class__.__name__}"
-                )
-            # We treat the first document index we got as the primary one used
-            # for reporting the state of indexing.
-            if primary_doc_idx_insertion_records is None:
-                primary_doc_idx_insertion_records = insertion_records
-            if primary_doc_idx_vector_db_write_failures is None:
-                primary_doc_idx_vector_db_write_failures = vector_db_write_failures

-        adapter.post_index(
-            context=context,
-            updatable_chunk_data=updatable_chunk_data,
-            filtered_documents=filtered_documents,
-            result=result,
-        )
+            primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = (
+                None
+            )
+            primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = (
+                None
+            )
+
+            for document_index in document_indices:
+
+                def _enriched_stream() -> Iterator[DocMetadataAwareIndexChunk]:
+                    for chunk in chunk_store.stream():
+                        yield enricher.enrich_chunk(chunk, 1.0)
+
+                insertion_records, write_failures = (
+                    write_chunks_to_vector_db_with_backoff(
+                        document_index=document_index,
+                        make_chunks=_enriched_stream,
+                        index_batch_params=index_batch_params,
+                    )
+                )
+
+                _verify_indexing_completeness(
+                    insertion_records=insertion_records,
+                    write_failures=write_failures,
+                    embedding_failed_doc_ids=embedding_failed_doc_ids,
+                    updatable_ids=updatable_ids,
+                    document_index_name=document_index.__class__.__name__,
+                )
+                # We treat the first document index we got as the primary one used
+                # for reporting the state of indexing.
+                if primary_doc_idx_insertion_records is None:
+                    primary_doc_idx_insertion_records = insertion_records
+                if primary_doc_idx_vector_db_write_failures is None:
+                    primary_doc_idx_vector_db_write_failures = write_failures
+
+            adapter.post_index(
+                context=context,
+                updatable_chunk_data=updatable_chunk_data,
+                filtered_documents=filtered_documents,
+                enrichment=enricher,
+            )

    assert primary_doc_idx_insertion_records is not None
    assert primary_doc_idx_vector_db_write_failures is not None
    return IndexingPipelineResult(
-        new_docs=len(
-            [r for r in primary_doc_idx_insertion_records if not r.already_existed]
+        new_docs=sum(
+            1 for r in primary_doc_idx_insertion_records if not r.already_existed
        ),
        total_docs=len(filtered_documents),
-        total_chunks=len(chunks_with_embeddings),
-        failures=primary_doc_idx_vector_db_write_failures + embedding_failures,
+        total_chunks=len(embedding_result.successful_chunk_ids),
+        failures=primary_doc_idx_vector_db_write_failures
+        + embedding_result.connector_failures,
    )


--- a/backend/onyx/indexing/models.py
+++ b/backend/onyx/indexing/models.py
@@ -235,12 +235,16 @@ class UpdatableChunkData(BaseModel):
    boost_score: float


-class BuildMetadataAwareChunksResult(BaseModel):
-    chunks: list[DocMetadataAwareIndexChunk]
+class ChunkEnrichmentContext(Protocol):
+    """Returned by prepare_enrichment. Holds pre-computed metadata lookups
+    and provides per-chunk enrichment."""
+
    doc_id_to_previous_chunk_cnt: dict[str, int]
    doc_id_to_new_chunk_cnt: dict[str, int]
-    user_file_id_to_raw_text: dict[str, str]
-    user_file_id_to_token_count: dict[str, int | None]
+
+    def enrich_chunk(
+        self, chunk: IndexChunk, score: float
+    ) -> DocMetadataAwareIndexChunk: ...


 class IndexingBatchAdapter(Protocol):
@@ -254,18 +258,24 @@ class IndexingBatchAdapter(Protocol):
    ) -> Generator[TransactionalContext, None, None]:
        """Provide a transaction/row-lock context for critical updates."""

-    def build_metadata_aware_chunks(
+    def prepare_enrichment(
        self,
-        chunks_with_embeddings: list[IndexChunk],
-        chunk_content_scores: list[float],
-        tenant_id: str,
        context: "DocumentBatchPrepareContext",
-    ) -> BuildMetadataAwareChunksResult: ...
+        tenant_id: str,
+        chunks: list[DocAwareChunk],
+    ) -> ChunkEnrichmentContext:
+        """Prepare per-chunk enrichment data (access, document sets, boost, etc.).
+
+        Precondition: ``chunks`` have already been through the embedding step
+        (i.e. they are ``IndexChunk`` instances with populated embeddings,
+        passed here as the base ``DocAwareChunk`` type).
+        """
+        ...

    def post_index(
        self,
        context: "DocumentBatchPrepareContext",
        updatable_chunk_data: list[UpdatableChunkData],
        filtered_documents: list[Document],
-        result: BuildMetadataAwareChunksResult,
+        enrichment: ChunkEnrichmentContext,
    ) -> None: ...
--- a/backend/onyx/indexing/vector_db_insertion.py
+++ b/backend/onyx/indexing/vector_db_insertion.py
@@ -1,6 +1,9 @@
 import time
-from collections import defaultdict
+from collections.abc import Callable
+from collections.abc import Iterable
 from http import HTTPStatus
+from itertools import chain
+from itertools import groupby

 import httpx

@@ -28,22 +31,22 @@ def _log_insufficient_storage_error(e: Exception) -> None:

 def write_chunks_to_vector_db_with_backoff(
    document_index: DocumentIndex,
-    chunks: list[DocMetadataAwareIndexChunk],
+    make_chunks: Callable[[], Iterable[DocMetadataAwareIndexChunk]],
    index_batch_params: IndexBatchParams,
 ) -> tuple[list[DocumentInsertionRecord], list[ConnectorFailure]]:
    """Tries to insert all chunks in one large batch. If that batch fails for any reason,
    goes document by document to isolate the failure(s).

    IMPORTANT: must pass in whole documents at a time not individual chunks, since the
-    vector DB interface assumes that all chunks for a single document are present.
+    vector DB interface assumes that all chunks for a single document are present. The
+    chunks must also be in contiguous batches
    """
-
    # first try to write the chunks to the vector db
    try:
        return (
            list(
                document_index.index(
-                    chunks=chunks,
+                    chunks=make_chunks(),
                    index_batch_params=index_batch_params,
                )
            ),
@@ -60,14 +63,23 @@ def write_chunks_to_vector_db_with_backoff(
        # wait a couple seconds just to give the vector db a chance to recover
        time.sleep(2)

-    # try writing each doc one by one
-    chunks_for_docs: dict[str, list[DocMetadataAwareIndexChunk]] = defaultdict(list)
-    for chunk in chunks:
-        chunks_for_docs[chunk.source_document.id].append(chunk)
-
    insertion_records: list[DocumentInsertionRecord] = []
    failures: list[ConnectorFailure] = []
-    for doc_id, chunks_for_doc in chunks_for_docs.items():
+
+    def key(chunk: DocMetadataAwareIndexChunk) -> str:
+        return chunk.source_document.id
+
+    seen_doc_ids: set[str] = set()
+    for doc_id, chunks_for_doc in groupby(make_chunks(), key=key):
+        if doc_id in seen_doc_ids:
+            raise RuntimeError(
+                f"Doc chunks are not arriving in order. Current doc_id={doc_id}, seen_doc_ids={list(seen_doc_ids)}"
+            )
+        seen_doc_ids.add(doc_id)
+
+        first_chunk = next(chunks_for_doc)
+        chunks_for_doc = chain([first_chunk], chunks_for_doc)
+
        try:
            insertion_records.extend(
                document_index.index(
@@ -87,9 +99,7 @@ def write_chunks_to_vector_db_with_backoff(
                ConnectorFailure(
                    failed_document=DocumentFailure(
                        document_id=doc_id,
-                        document_link=(
-                            chunks_for_doc[0].get_link() if chunks_for_doc else None
-                        ),
+                        document_link=first_chunk.get_link(),
                    ),
                    failure_message=str(e),
                    exception=e,
--- a/backend/onyx/llm/multi_llm.py
+++ b/backend/onyx/llm/multi_llm.py
@@ -185,6 +185,21 @@ def _messages_contain_tool_content(messages: list[dict[str, Any]]) -> bool:
    return False


+def _prompt_contains_tool_call_history(prompt: LanguageModelInput) -> bool:
+    """Check if the prompt contains any assistant messages with tool_calls.
+
+    When Anthropic's extended thinking is enabled, the API requires every
+    assistant message to start with a thinking block before any tool_use
+    blocks.  Since we don't preserve thinking_blocks (they carry
+    cryptographic signatures that can't be reconstructed), we must skip
+    the thinking param whenever history contains prior tool-calling turns.
+    """
+    from onyx.llm.models import AssistantMessage
+
+    msgs = prompt if isinstance(prompt, list) else [prompt]
+    return any(isinstance(msg, AssistantMessage) and msg.tool_calls for msg in msgs)
+
+
 def _is_vertex_model_rejecting_output_config(model_name: str) -> bool:
    normalized_model_name = model_name.lower()
    return any(
@@ -466,7 +481,20 @@ class LitellmLLM(LLM):
                    reasoning_effort
                )

-                if budget_tokens is not None:
+                # Anthropic requires every assistant message with tool_use
+                # blocks to start with a thinking block that carries a
+                # cryptographic signature.  We don't preserve those blocks
+                # across turns, so skip thinking when the history already
+                # contains tool-calling assistant messages.  LiteLLM's
+                # modify_params workaround doesn't cover all providers
+                # (notably Bedrock).
+                can_enable_thinking = (
+                    budget_tokens is not None
+                    and not _prompt_contains_tool_call_history(prompt)
+                )
+
+                if can_enable_thinking:
+                    assert budget_tokens is not None  # mypy
                    if max_tokens is not None:
                        # Anthropic has a weird rule where max token has to be at least as much as budget tokens if set
                        # and the minimum budget tokens is 1024
--- a/backend/onyx/main.py
+++ b/backend/onyx/main.py
@@ -77,7 +77,6 @@ from onyx.server.features.default_assistant.api import (
 )
 from onyx.server.features.document_set.api import router as document_set_router
 from onyx.server.features.hierarchy.api import router as hierarchy_router
-from onyx.server.features.hooks.api import router as hook_router
 from onyx.server.features.input_prompt.api import (
    admin_router as admin_input_prompt_router,
 )
@@ -439,6 +438,7 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
            dsn=SENTRY_DSN,
            integrations=[StarletteIntegration(), FastApiIntegration()],
            traces_sample_rate=0.1,
+            release=__version__,
        )
        logger.info("Sentry initialized")
    else:
@@ -454,7 +454,6 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:

    register_onyx_exception_handlers(application)

-    include_router_with_global_prefix_prepended(application, hook_router)
    include_router_with_global_prefix_prepended(application, password_router)
    include_router_with_global_prefix_prepended(application, chat_router)
    include_router_with_global_prefix_prepended(application, query_router)
--- a/backend/onyx/server/settings/api.py
+++ b/backend/onyx/server/settings/api.py
@@ -21,7 +21,6 @@ from onyx.db.notification import get_notifications
 from onyx.db.notification import update_notification_last_shown
 from onyx.error_handling.error_codes import OnyxErrorCode
 from onyx.error_handling.exceptions import OnyxError
-from onyx.hooks.utils import HOOKS_AVAILABLE
 from onyx.key_value_store.factory import get_kv_store
 from onyx.key_value_store.interface import KvKeyNotFoundError
 from onyx.server.features.build.utils import is_onyx_craft_enabled
@@ -38,6 +37,7 @@ from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
 )
+from shared_configs.configs import MULTI_TENANT

 logger = setup_logger()

@@ -98,7 +98,7 @@ def fetch_settings(
        needs_reindexing=needs_reindexing,
        onyx_craft_enabled=onyx_craft_enabled_for_user,
        vector_db_enabled=not DISABLE_VECTOR_DB,
-        hooks_enabled=HOOKS_AVAILABLE,
+        hooks_enabled=not MULTI_TENANT,
        version=onyx_version,
        max_allowed_upload_size_mb=MAX_ALLOWED_UPLOAD_SIZE_MB,
        default_user_file_max_upload_size_mb=min(
--- a/backend/onyx/server/settings/models.py
+++ b/backend/onyx/server/settings/models.py
@@ -116,7 +116,7 @@ class UserSettings(Settings):
    # False when DISABLE_VECTOR_DB is set — connectors, RAG search, and
    # document sets are unavailable.
    vector_db_enabled: bool = True
-    # True when hooks are available: single-tenant deployment with HOOK_ENABLED=true.
+    # True when hooks are available: single-tenant EE deployments only.
    hooks_enabled: bool = False
    # Application version, read from the ONYX_VERSION env var at startup.
    version: str | None = None
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -187,7 +187,7 @@ coloredlogs==15.0.1
    # via onnxruntime
 courlan==1.3.2
    # via trafilatura
-cryptography==46.0.5
+cryptography==46.0.6
    # via
    #   authlib
    #   google-auth
@@ -449,7 +449,7 @@ kombu==5.5.4
    # via celery
 kubernetes==31.0.0
    # via onyx
-langchain-core==1.2.11
+langchain-core==1.2.22
    # via onyx
 langdetect==1.0.9
    # via unstructured
@@ -735,7 +735,7 @@ pyee==13.0.0
    # via playwright
 pygithub==2.5.0
    # via onyx
-pygments==2.19.2
+pygments==2.20.0
    # via rich
 pyjwt==2.12.0
    # via
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@@ -97,7 +97,7 @@ comm==0.2.3
    # via ipykernel
 contourpy==1.3.3
    # via matplotlib
-cryptography==46.0.5
+cryptography==46.0.6
    # via
    #   google-auth
    #   pyjwt
@@ -349,7 +349,7 @@ pydantic-core==2.33.2
    # via pydantic
 pydantic-settings==2.12.0
    # via mcp
-pygments==2.19.2
+pygments==2.20.0
    # via
    #   ipython
    #   ipython-pygments-lexers
--- a/backend/requirements/ee.txt
+++ b/backend/requirements/ee.txt
@@ -76,7 +76,7 @@ colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   tqdm
-cryptography==46.0.5
+cryptography==46.0.6
    # via
    #   google-auth
    #   pyjwt
--- a/backend/requirements/model_server.txt
+++ b/backend/requirements/model_server.txt
@@ -92,7 +92,7 @@ colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   tqdm
-cryptography==46.0.5
+cryptography==46.0.6
    # via
    #   google-auth
    #   pyjwt
--- a/backend/scripts/run_industryrag_bench_questions.py
+++ b/backend/scripts/run_industryrag_bench_questions.py
@@ -5,6 +5,7 @@ import asyncio
 import json
 import logging
 import sys
+import time
 from dataclasses import asdict
 from dataclasses import dataclass
 from pathlib import Path
@@ -27,6 +28,9 @@ INTERNAL_SEARCH_TOOL_NAME = "internal_search"
 INTERNAL_SEARCH_IN_CODE_TOOL_ID = "SearchTool"
 MAX_REQUEST_ATTEMPTS = 5
 RETRIABLE_STATUS_CODES = {429, 500, 502, 503, 504}
+QUESTION_TIMEOUT_SECONDS = 300
+QUESTION_RETRY_PAUSE_SECONDS = 30
+MAX_QUESTION_ATTEMPTS = 3


@dataclass(frozen=True)
@@ -109,6 +113,27 @@ def normalize_api_base(api_base: str) -> str:
    return f"{normalized}/api"


+def load_completed_question_ids(output_file: Path) -> set[str]:
+    if not output_file.exists():
+        return set()
+
+    completed_ids: set[str] = set()
+    with output_file.open("r", encoding="utf-8") as file:
+        for line in file:
+            stripped = line.strip()
+            if not stripped:
+                continue
+            try:
+                record = json.loads(stripped)
+            except json.JSONDecodeError:
+                continue
+            question_id = record.get("question_id")
+            if isinstance(question_id, str) and question_id:
+                completed_ids.add(question_id)
+
+    return completed_ids
+
+
 def load_questions(questions_file: Path) -> list[QuestionRecord]:
    if not questions_file.exists():
        raise FileNotFoundError(f"Questions file not found: {questions_file}")
@@ -348,6 +373,7 @@ async def generate_answers(
    api_base: str,
    api_key: str,
    parallelism: int,
+    skipped: int,
 ) -> None:
    if parallelism < 1:
        raise ValueError("`--parallelism` must be at least 1.")
@@ -382,58 +408,178 @@ async def generate_answers(
            write_lock = asyncio.Lock()
            completed = 0
            successful = 0
+            stuck_count = 0
            failed_questions: list[FailedQuestionRecord] = []
-            total = len(questions)
+            remaining_count = len(questions)
+            overall_total = remaining_count + skipped
+            question_durations: list[float] = []
+            run_start_time = time.monotonic()
+
+            def print_progress() -> None:
+                avg_time = (
+                    sum(question_durations) / len(question_durations)
+                    if question_durations
+                    else 0.0
+                )
+                elapsed = time.monotonic() - run_start_time
+                eta = avg_time * (remaining_count - completed) / max(parallelism, 1)
+
+                done = skipped + completed
+                bar_width = 30
+                filled = (
+                    int(bar_width * done / overall_total)
+                    if overall_total
+                    else bar_width
+                )
+                bar = "█" * filled + "░" * (bar_width - filled)
+                pct = (done / overall_total * 100) if overall_total else 100.0
+
+                parts = (
+                    f"\r{bar} {pct:5.1f}% "
+                    f"[{done}/{overall_total}] "
+                    f"avg {avg_time:.1f}s/q "
+                    f"elapsed {elapsed:.0f}s "
+                    f"ETA {eta:.0f}s "
+                    f"(ok:{successful} fail:{len(failed_questions)}"
+                )
+                if stuck_count:
+                    parts += f" stuck:{stuck_count}"
+                if skipped:
+                    parts += f" skip:{skipped}"
+                parts += ")"
+
+                sys.stderr.write(parts)
+                sys.stderr.flush()
+
+            print_progress()

            async def process_question(question_record: QuestionRecord) -> None:
                nonlocal completed
                nonlocal successful
+                nonlocal stuck_count

-                try:
-                    async with semaphore:
-                        result = await submit_question(
-                            session=session,
-                            api_base=api_base,
-                            headers=headers,
-                            internal_search_tool_id=internal_search_tool_id,
-                            question_record=question_record,
+                last_error: Exception | None = None
+                for attempt in range(1, MAX_QUESTION_ATTEMPTS + 1):
+                    q_start = time.monotonic()
+                    try:
+                        async with semaphore:
+                            result = await asyncio.wait_for(
+                                submit_question(
+                                    session=session,
+                                    api_base=api_base,
+                                    headers=headers,
+                                    internal_search_tool_id=internal_search_tool_id,
+                                    question_record=question_record,
+                                ),
+                                timeout=QUESTION_TIMEOUT_SECONDS,
+                            )
+                    except asyncio.TimeoutError:
+                        async with progress_lock:
+                            stuck_count += 1
+                            logger.warning(
+                                "Question %s timed out after %ss (attempt %s/%s, "
+                                "total stuck: %s) — retrying in %ss",
+                                question_record.question_id,
+                                QUESTION_TIMEOUT_SECONDS,
+                                attempt,
+                                MAX_QUESTION_ATTEMPTS,
+                                stuck_count,
+                                QUESTION_RETRY_PAUSE_SECONDS,
+                            )
+                            print_progress()
+                        last_error = TimeoutError(
+                            f"Timed out after {QUESTION_TIMEOUT_SECONDS}s "
+                            f"on attempt {attempt}/{MAX_QUESTION_ATTEMPTS}"
                        )
-                except Exception as exc:
+                        await asyncio.sleep(QUESTION_RETRY_PAUSE_SECONDS)
+                        continue
+                    except Exception as exc:
+                        duration = time.monotonic() - q_start
+                        async with progress_lock:
+                            completed += 1
+                            question_durations.append(duration)
+                            failed_questions.append(
+                                FailedQuestionRecord(
+                                    question_id=question_record.question_id,
+                                    error=str(exc),
+                                )
+                            )
+                            logger.exception(
+                                "Failed question %s (%s/%s)",
+                                question_record.question_id,
+                                completed,
+                                remaining_count,
+                            )
+                            print_progress()
+                        return
+
+                    duration = time.monotonic() - q_start
+
+                    async with write_lock:
+                        file.write(json.dumps(asdict(result), ensure_ascii=False))
+                        file.write("\n")
+                        file.flush()
+
                    async with progress_lock:
                        completed += 1
-                        failed_questions.append(
-                            FailedQuestionRecord(
-                                question_id=question_record.question_id,
-                                error=str(exc),
-                            )
-                        )
-                        logger.exception(
-                            "Failed question %s (%s/%s)",
-                            question_record.question_id,
-                            completed,
-                            total,
-                        )
+                        successful += 1
+                        question_durations.append(duration)
+                        print_progress()
                    return

-                async with write_lock:
-                    file.write(json.dumps(asdict(result), ensure_ascii=False))
-                    file.write("\n")
-                    file.flush()
-
+                # All attempts exhausted due to timeouts
                async with progress_lock:
                    completed += 1
-                    successful += 1
-                    logger.info("Processed %s/%s questions", completed, total)
+                    failed_questions.append(
+                        FailedQuestionRecord(
+                            question_id=question_record.question_id,
+                            error=str(last_error),
+                        )
+                    )
+                    logger.error(
+                        "Question %s failed after %s timeout attempts (%s/%s)",
+                        question_record.question_id,
+                        MAX_QUESTION_ATTEMPTS,
+                        completed,
+                        remaining_count,
+                    )
+                    print_progress()

            await asyncio.gather(
                *(process_question(question_record) for question_record in questions)
            )

+            # Final newline after progress bar
+            sys.stderr.write("\n")
+            sys.stderr.flush()
+
+            total_elapsed = time.monotonic() - run_start_time
+            avg_time = (
+                sum(question_durations) / len(question_durations)
+                if question_durations
+                else 0.0
+            )
+            stuck_suffix = f", {stuck_count} stuck timeouts" if stuck_count else ""
+            resume_suffix = (
+                f" — {skipped} previously completed, "
+                f"{skipped + successful}/{overall_total} overall"
+                if skipped
+                else ""
+            )
+            logger.info(
+                "Done: %s/%s successful in %.1fs (avg %.1fs/question%s)%s",
+                successful,
+                remaining_count,
+                total_elapsed,
+                avg_time,
+                stuck_suffix,
+                resume_suffix,
+            )
+
            if failed_questions:
                logger.warning(
-                    "Completed with %s failed questions and %s successful questions.",
+                    "%s questions failed:",
                    len(failed_questions),
-                    successful,
                )
                for failed_question in failed_questions:
                    logger.warning(
@@ -453,7 +599,30 @@ def main() -> None:
            raise ValueError("`--max-questions` must be at least 1 when provided.")
        questions = questions[: args.max_questions]

-    logger.info("Loaded %s questions from %s", len(questions), args.questions_file)
+    completed_ids = load_completed_question_ids(args.output_file)
+    logger.info(
+        "Found %s already-answered question IDs in %s",
+        len(completed_ids),
+        args.output_file,
+    )
+    total_before_filter = len(questions)
+    questions = [q for q in questions if q.question_id not in completed_ids]
+    skipped = total_before_filter - len(questions)
+
+    if skipped:
+        logger.info(
+            "Resuming: %s/%s already answered, %s remaining",
+            skipped,
+            total_before_filter,
+            len(questions),
+        )
+    else:
+        logger.info("Loaded %s questions from %s", len(questions), args.questions_file)
+
+    if not questions:
+        logger.info("All questions already answered. Nothing to do.")
+        return
+
    logger.info("Writing answers to %s", args.output_file)

    asyncio.run(
@@ -463,6 +632,7 @@ def main() -> None:
            api_base=api_base,
            api_key=args.api_key,
            parallelism=args.parallelism,
+            skipped=skipped,
        )
    )

--- a/backend/tests/external_dependency_unit/celery/test_user_file_indexing_adapter.py
+++ b/backend/tests/external_dependency_unit/celery/test_user_file_indexing_adapter.py
@@ -1,7 +1,7 @@
 """
 External dependency unit tests for UserFileIndexingAdapter metadata writing.

-Validates that build_metadata_aware_chunks produces DocMetadataAwareIndexChunk
+Validates that prepare_enrichment produces DocMetadataAwareIndexChunk
 objects with both `user_project` and `personas` fields populated correctly
 based on actual DB associations.

@@ -127,7 +127,7 @@ def _make_index_chunk(user_file: UserFile) -> IndexChunk:


 class TestAdapterWritesBothMetadataFields:
-    """build_metadata_aware_chunks must populate user_project AND personas."""
+    """prepare_enrichment must populate user_project AND personas."""

    @patch(
        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
@@ -153,15 +153,13 @@ class TestAdapterWritesBothMetadataFields:
        doc = chunk.source_document
        context = DocumentBatchPrepareContext(updatable_docs=[doc], id_to_boost_map={})

-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=[chunk],
-            chunk_content_scores=[1.0],
-            tenant_id=TEST_TENANT_ID,
+        enricher = adapter.prepare_enrichment(
            context=context,
+            tenant_id=TEST_TENANT_ID,
+            chunks=[chunk],
        )
+        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

-        assert len(result.chunks) == 1
-        aware_chunk = result.chunks[0]
        assert persona.id in aware_chunk.personas
        assert aware_chunk.user_project == []

@@ -190,15 +188,13 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=[chunk],
-            chunk_content_scores=[1.0],
-            tenant_id=TEST_TENANT_ID,
+        enricher = adapter.prepare_enrichment(
            context=context,
+            tenant_id=TEST_TENANT_ID,
+            chunks=[chunk],
        )
+        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

-        assert len(result.chunks) == 1
-        aware_chunk = result.chunks[0]
        assert project.id in aware_chunk.user_project
        assert aware_chunk.personas == []

@@ -229,14 +225,13 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=[chunk],
-            chunk_content_scores=[1.0],
-            tenant_id=TEST_TENANT_ID,
+        enricher = adapter.prepare_enrichment(
            context=context,
+            tenant_id=TEST_TENANT_ID,
+            chunks=[chunk],
        )
+        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

-        aware_chunk = result.chunks[0]
        assert persona.id in aware_chunk.personas
        assert project.id in aware_chunk.user_project

@@ -261,14 +256,13 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=[chunk],
-            chunk_content_scores=[1.0],
-            tenant_id=TEST_TENANT_ID,
+        enricher = adapter.prepare_enrichment(
            context=context,
+            tenant_id=TEST_TENANT_ID,
+            chunks=[chunk],
        )
+        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

-        aware_chunk = result.chunks[0]
        assert aware_chunk.personas == []
        assert aware_chunk.user_project == []

@@ -300,12 +294,11 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=[chunk],
-            chunk_content_scores=[1.0],
-            tenant_id=TEST_TENANT_ID,
+        enricher = adapter.prepare_enrichment(
            context=context,
+            tenant_id=TEST_TENANT_ID,
+            chunks=[chunk],
        )
+        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

-        aware_chunk = result.chunks[0]
        assert set(aware_chunk.personas) == {persona_a.id, persona_b.id}
--- a/backend/tests/unit/ee/onyx/hooks/init.py
+++ b/backend/tests/unit/ee/onyx/hooks/init.py
--- a/backend/tests/unit/ee/onyx/hooks/test_executor.py
+++ b/backend/tests/unit/ee/onyx/hooks/test_executor.py
@@ -9,11 +9,11 @@ import httpx
 import pytest
 from pydantic import BaseModel

+from ee.onyx.hooks.executor import _execute_hook_impl as execute_hook
 from onyx.db.enums import HookFailStrategy
 from onyx.db.enums import HookPoint
 from onyx.error_handling.error_codes import OnyxErrorCode
 from onyx.error_handling.exceptions import OnyxError
-from onyx.hooks.executor import execute_hook
 from onyx.hooks.executor import HookSkipped
 from onyx.hooks.executor import HookSoftFailed
 from onyx.hooks.points.query_processing import QueryProcessingResponse
@@ -118,28 +118,30 @@ def db_session() -> MagicMock:


@pytest.mark.parametrize(
-    "hooks_available,hook",
+    "multi_tenant,hook",
    [
-        # HOOKS_AVAILABLE=False exits before the DB lookup — hook is irrelevant.
-        pytest.param(False, None, id="hooks_not_available"),
-        pytest.param(True, None, id="hook_not_found"),
-        pytest.param(True, _make_hook(is_active=False), id="hook_inactive"),
-        pytest.param(True, _make_hook(endpoint_url=None), id="no_endpoint_url"),
+        # MULTI_TENANT=True exits before the DB lookup — hook is irrelevant.
+        pytest.param(True, None, id="multi_tenant"),
+        pytest.param(False, None, id="hook_not_found"),
+        pytest.param(False, _make_hook(is_active=False), id="hook_inactive"),
+        pytest.param(False, _make_hook(endpoint_url=None), id="no_endpoint_url"),
    ],
 )
 def test_early_exit_returns_skipped_with_no_db_writes(
    db_session: MagicMock,
-    hooks_available: bool,
+    multi_tenant: bool,
    hook: MagicMock | None,
 ) -> None:
    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", hooks_available),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", multi_tenant),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch(
+            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
+        ) as mock_log,
    ):
        result = execute_hook(
            db_session=db_session,
@@ -164,14 +166,16 @@ def test_success_returns_validated_model_and_sets_reachable(
    hook = _make_hook()

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch(
+            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
+        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(mock_client_cls, response=_make_response())
@@ -195,14 +199,14 @@ def test_success_skips_reachable_write_when_already_true(db_session: MagicMock)
    hook = _make_hook(is_reachable=True)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch("ee.onyx.hooks.executor.create_hook_execution_log__no_commit"),
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(mock_client_cls, response=_make_response())
@@ -224,14 +228,16 @@ def test_non_dict_json_response_is_a_failure(db_session: MagicMock) -> None:
    hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch(
+            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
+        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(
@@ -258,14 +264,16 @@ def test_json_decode_failure_is_a_failure(db_session: MagicMock) -> None:
    hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch(
+            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
+        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(
@@ -384,14 +392,14 @@ def test_http_failure_paths(
    hook = _make_hook(fail_strategy=fail_strategy)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch("ee.onyx.hooks.executor.create_hook_execution_log__no_commit"),
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(mock_client_cls, side_effect=exception)
@@ -443,14 +451,14 @@ def test_authorization_header(
    hook = _make_hook(api_key=api_key)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit"),
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit"),
+        patch("ee.onyx.hooks.executor.create_hook_execution_log__no_commit"),
        patch("httpx.Client") as mock_client_cls,
    ):
        mock_client = _setup_client(mock_client_cls, response=_make_response())
@@ -489,13 +497,13 @@ def test_persist_session_failure_is_swallowed(
    hook = _make_hook(fail_strategy=HookFailStrategy.HARD)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch(
-            "onyx.hooks.executor.get_session_with_current_tenant",
+            "ee.onyx.hooks.executor.get_session_with_current_tenant",
            side_effect=RuntimeError("DB unavailable"),
        ),
        patch("httpx.Client") as mock_client_cls,
@@ -556,14 +564,16 @@ def test_response_validation_failure_respects_fail_strategy(
    hook = _make_hook(fail_strategy=fail_strategy)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch(
+            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
+        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        # Response payload is missing required_field → ValidationError
@@ -619,13 +629,13 @@ def test_unexpected_exception_in_inner_respects_fail_strategy(
    hook = _make_hook(fail_strategy=fail_strategy)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch(
-            "onyx.hooks.executor._execute_hook_inner",
+            "ee.onyx.hooks.executor._execute_hook_inner",
            side_effect=ValueError("unexpected bug"),
        ),
    ):
@@ -658,17 +668,19 @@ def test_is_reachable_failure_does_not_prevent_log(db_session: MagicMock) -> Non
    hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
        patch(
-            "onyx.hooks.executor.update_hook__no_commit",
+            "ee.onyx.hooks.executor.update_hook__no_commit",
            side_effect=OnyxError(OnyxErrorCode.NOT_FOUND, "hook deleted"),
        ),
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
+        patch(
+            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
+        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(mock_client_cls, side_effect=httpx.ConnectError("refused"))
--- a/backend/tests/unit/ee/onyx/server/init.py
+++ b/backend/tests/unit/ee/onyx/server/init.py
--- a/backend/tests/unit/ee/onyx/server/features/init.py
+++ b/backend/tests/unit/ee/onyx/server/features/init.py
--- a/backend/tests/unit/ee/onyx/server/features/hooks/init.py
+++ b/backend/tests/unit/ee/onyx/server/features/hooks/init.py
--- a/backend/tests/unit/ee/onyx/server/features/hooks/test_api.py
+++ b/backend/tests/unit/ee/onyx/server/features/hooks/test_api.py
@@ -1,4 +1,4 @@
-"""Unit tests for onyx.server.features.hooks.api helpers.
+"""Unit tests for ee.onyx.server.features.hooks.api helpers.

 Covers:
 - _check_ssrf_safety: scheme enforcement and private-IP blocklist
@@ -16,13 +16,13 @@ from unittest.mock import patch
 import httpx
 import pytest

+from ee.onyx.server.features.hooks.api import _check_ssrf_safety
+from ee.onyx.server.features.hooks.api import _raise_for_validation_failure
+from ee.onyx.server.features.hooks.api import _validate_endpoint
 from onyx.error_handling.error_codes import OnyxErrorCode
 from onyx.error_handling.exceptions import OnyxError
 from onyx.hooks.models import HookValidateResponse
 from onyx.hooks.models import HookValidateStatus
-from onyx.server.features.hooks.api import _check_ssrf_safety
-from onyx.server.features.hooks.api import _raise_for_validation_failure
-from onyx.server.features.hooks.api import _validate_endpoint

 # ---------------------------------------------------------------------------
 # Helpers
@@ -117,28 +117,28 @@ class TestCheckSsrfSafety:
 class TestValidateEndpoint:
    def _call(self, *, api_key: str | None = _API_KEY) -> HookValidateResponse:
        # Bypass SSRF check — tested separately in TestCheckSsrfSafety.
-        with patch("onyx.server.features.hooks.api._check_ssrf_safety"):
+        with patch("ee.onyx.server.features.hooks.api._check_ssrf_safety"):
            return _validate_endpoint(
                endpoint_url=_URL,
                api_key=api_key,
                timeout_seconds=_TIMEOUT,
            )

-    @patch("onyx.server.features.hooks.api.httpx.Client")
+    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_2xx_returns_passed(self, mock_client_cls: MagicMock) -> None:
        mock_client_cls.return_value.__enter__.return_value.post.return_value = (
            _mock_response(200)
        )
        assert self._call().status == HookValidateStatus.passed

-    @patch("onyx.server.features.hooks.api.httpx.Client")
+    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_5xx_returns_passed(self, mock_client_cls: MagicMock) -> None:
        mock_client_cls.return_value.__enter__.return_value.post.return_value = (
            _mock_response(500)
        )
        assert self._call().status == HookValidateStatus.passed

-    @patch("onyx.server.features.hooks.api.httpx.Client")
+    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    @pytest.mark.parametrize("status_code", [401, 403])
    def test_401_403_returns_auth_failed(
        self, mock_client_cls: MagicMock, status_code: int
@@ -150,21 +150,21 @@ class TestValidateEndpoint:
        assert result.status == HookValidateStatus.auth_failed
        assert str(status_code) in (result.error_message or "")

-    @patch("onyx.server.features.hooks.api.httpx.Client")
+    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_4xx_non_auth_returns_passed(self, mock_client_cls: MagicMock) -> None:
        mock_client_cls.return_value.__enter__.return_value.post.return_value = (
            _mock_response(422)
        )
        assert self._call().status == HookValidateStatus.passed

-    @patch("onyx.server.features.hooks.api.httpx.Client")
+    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_connect_timeout_returns_timeout(self, mock_client_cls: MagicMock) -> None:
        mock_client_cls.return_value.__enter__.return_value.post.side_effect = (
            httpx.ConnectTimeout("timed out")
        )
        assert self._call().status == HookValidateStatus.timeout

-    @patch("onyx.server.features.hooks.api.httpx.Client")
+    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    @pytest.mark.parametrize(
        "exc",
        [
@@ -179,7 +179,7 @@ class TestValidateEndpoint:
        mock_client_cls.return_value.__enter__.return_value.post.side_effect = exc
        assert self._call().status == HookValidateStatus.timeout

-    @patch("onyx.server.features.hooks.api.httpx.Client")
+    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_connect_error_returns_cannot_connect(
        self, mock_client_cls: MagicMock
    ) -> None:
@@ -189,7 +189,7 @@ class TestValidateEndpoint:
        )
        assert self._call().status == HookValidateStatus.cannot_connect

-    @patch("onyx.server.features.hooks.api.httpx.Client")
+    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_arbitrary_exception_returns_cannot_connect(
        self, mock_client_cls: MagicMock
    ) -> None:
@@ -198,7 +198,7 @@ class TestValidateEndpoint:
        )
        assert self._call().status == HookValidateStatus.cannot_connect

-    @patch("onyx.server.features.hooks.api.httpx.Client")
+    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_api_key_sent_as_bearer(self, mock_client_cls: MagicMock) -> None:
        mock_post = mock_client_cls.return_value.__enter__.return_value.post
        mock_post.return_value = _mock_response(200)
@@ -206,7 +206,7 @@ class TestValidateEndpoint:
        _, kwargs = mock_post.call_args
        assert kwargs["headers"]["Authorization"] == "Bearer mykey"

-    @patch("onyx.server.features.hooks.api.httpx.Client")
+    @patch("ee.onyx.server.features.hooks.api.httpx.Client")
    def test_no_api_key_omits_auth_header(self, mock_client_cls: MagicMock) -> None:
        mock_post = mock_client_cls.return_value.__enter__.return_value.post
        mock_post.return_value = _mock_response(200)
--- a/backend/tests/unit/onyx/connectors/canvas/test_canvas_connector.py
+++ b/backend/tests/unit/onyx/connectors/canvas/test_canvas_connector.py
@@ -1,15 +1,23 @@
-"""Tests for Canvas connector — client (PR1)."""
+"""Tests for Canvas connector — client, credentials, conversion."""

+from datetime import datetime
+from datetime import timezone
 from typing import Any
 from unittest.mock import MagicMock
 from unittest.mock import patch

 import pytest

+from onyx.configs.constants import DocumentSource
 from onyx.connectors.canvas.client import CanvasApiClient
+from onyx.connectors.canvas.connector import CanvasConnector
+from onyx.connectors.exceptions import ConnectorValidationError
+from onyx.connectors.exceptions import CredentialExpiredError
+from onyx.connectors.exceptions import InsufficientPermissionsError
+from onyx.connectors.exceptions import UnexpectedValidationError
+from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.error_handling.exceptions import OnyxError

-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -18,6 +26,77 @@ FAKE_BASE_URL = "https://myschool.instructure.com"
 FAKE_TOKEN = "fake-canvas-token"


+def _mock_course(
+    course_id: int = 1,
+    name: str = "Intro to CS",
+    course_code: str = "CS101",
+) -> dict[str, Any]:
+    return {
+        "id": course_id,
+        "name": name,
+        "course_code": course_code,
+        "created_at": "2025-01-01T00:00:00Z",
+        "workflow_state": "available",
+    }
+
+
+def _build_connector(base_url: str = FAKE_BASE_URL) -> CanvasConnector:
+    """Build a connector with mocked credential validation."""
+    with patch("onyx.connectors.canvas.client.rl_requests") as mock_req:
+        mock_req.get.return_value = _mock_response(json_data=[_mock_course()])
+        connector = CanvasConnector(canvas_base_url=base_url)
+        connector.load_credentials({"canvas_access_token": FAKE_TOKEN})
+    return connector
+
+
+def _mock_page(
+    page_id: int = 10,
+    title: str = "Syllabus",
+    updated_at: str = "2025-06-01T12:00:00Z",
+) -> dict[str, Any]:
+    return {
+        "page_id": page_id,
+        "url": "syllabus",
+        "title": title,
+        "body": "<p>Welcome to the course</p>",
+        "created_at": "2025-01-15T00:00:00Z",
+        "updated_at": updated_at,
+    }
+
+
+def _mock_assignment(
+    assignment_id: int = 20,
+    name: str = "Homework 1",
+    course_id: int = 1,
+    updated_at: str = "2025-06-01T12:00:00Z",
+) -> dict[str, Any]:
+    return {
+        "id": assignment_id,
+        "name": name,
+        "description": "<p>Solve these problems</p>",
+        "html_url": f"{FAKE_BASE_URL}/courses/{course_id}/assignments/{assignment_id}",
+        "course_id": course_id,
+        "created_at": "2025-01-20T00:00:00Z",
+        "updated_at": updated_at,
+        "due_at": "2025-02-01T23:59:00Z",
+    }
+
+
+def _mock_announcement(
+    announcement_id: int = 30,
+    title: str = "Class Cancelled",
+    course_id: int = 1,
+    posted_at: str = "2025-06-01T12:00:00Z",
+) -> dict[str, Any]:
+    return {
+        "id": announcement_id,
+        "title": title,
+        "message": "<p>No class today</p>",
+        "html_url": f"{FAKE_BASE_URL}/courses/{course_id}/discussion_topics/{announcement_id}",
+        "posted_at": posted_at,
+    }
+
+
 def _mock_response(
    status_code: int = 200,
    json_data: Any = None,
@@ -325,6 +404,57 @@ class TestGet:
        assert result == expected


+# ---------------------------------------------------------------------------
+# CanvasApiClient.paginate tests
+# ---------------------------------------------------------------------------
+
+
+class TestPaginate:
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_single_page(self, mock_requests: MagicMock) -> None:
+        mock_requests.get.return_value = _mock_response(
+            json_data=[{"id": 1}, {"id": 2}]
+        )
+        client = CanvasApiClient(
+            bearer_token=FAKE_TOKEN,
+            canvas_base_url=FAKE_BASE_URL,
+        )
+
+        pages = list(client.paginate("courses"))
+
+        assert len(pages) == 1
+        assert pages[0] == [{"id": 1}, {"id": 2}]
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_two_pages(self, mock_requests: MagicMock) -> None:
+        next_link = f'<{FAKE_BASE_URL}/api/v1/courses?page=2>; rel="next"'
+        page1 = _mock_response(json_data=[{"id": 1}], link_header=next_link)
+        page2 = _mock_response(json_data=[{"id": 2}])
+        mock_requests.get.side_effect = [page1, page2]
+        client = CanvasApiClient(
+            bearer_token=FAKE_TOKEN,
+            canvas_base_url=FAKE_BASE_URL,
+        )
+
+        pages = list(client.paginate("courses"))
+
+        assert len(pages) == 2
+        assert pages[0] == [{"id": 1}]
+        assert pages[1] == [{"id": 2}]
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_empty_response(self, mock_requests: MagicMock) -> None:
+        mock_requests.get.return_value = _mock_response(json_data=[])
+        client = CanvasApiClient(
+            bearer_token=FAKE_TOKEN,
+            canvas_base_url=FAKE_BASE_URL,
+        )
+
+        pages = list(client.paginate("courses"))
+
+        assert pages == []
+
+
 # ---------------------------------------------------------------------------
 # CanvasApiClient._parse_next_link tests
 # ---------------------------------------------------------------------------
@@ -379,3 +509,368 @@ class TestParseNextLink:

        with pytest.raises(OnyxError, match="must use https"):
            self.client._parse_next_link(header)
+
+
+# ---------------------------------------------------------------------------
+# CanvasConnector — credential loading
+# ---------------------------------------------------------------------------
+
+
+class TestLoadCredentials:
+    def _assert_load_credentials_raises(
+        self,
+        status_code: int,
+        expected_error: type[Exception],
+        mock_requests: MagicMock,
+    ) -> None:
+        """Helper: assert load_credentials raises expected_error for a given status."""
+        mock_requests.get.return_value = _mock_response(status_code, {})
+        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)
+        with pytest.raises(expected_error):
+            connector.load_credentials({"canvas_access_token": FAKE_TOKEN})
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_load_credentials_success(self, mock_requests: MagicMock) -> None:
+        mock_requests.get.return_value = _mock_response(json_data=[_mock_course()])
+        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)
+
+        result = connector.load_credentials({"canvas_access_token": FAKE_TOKEN})
+
+        assert result is None
+        assert connector._canvas_client is not None
+
+    def test_canvas_client_raises_without_credentials(self) -> None:
+        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)
+
+        with pytest.raises(ConnectorMissingCredentialError):
+            _ = connector.canvas_client
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_load_credentials_invalid_token(self, mock_requests: MagicMock) -> None:
+        self._assert_load_credentials_raises(401, CredentialExpiredError, mock_requests)
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_load_credentials_insufficient_permissions(
+        self, mock_requests: MagicMock
+    ) -> None:
+        self._assert_load_credentials_raises(
+            403, InsufficientPermissionsError, mock_requests
+        )
+
+
+# ---------------------------------------------------------------------------
+# CanvasConnector — URL normalization
+# ---------------------------------------------------------------------------
+
+
+class TestConnectorUrlNormalization:
+    def test_strips_api_v1_suffix(self) -> None:
+        connector = _build_connector(base_url=f"{FAKE_BASE_URL}/api/v1")
+
+        result = connector.canvas_base_url
+        expected = FAKE_BASE_URL
+
+        assert result == expected
+
+    def test_strips_trailing_slash(self) -> None:
+        connector = _build_connector(base_url=f"{FAKE_BASE_URL}/")
+
+        result = connector.canvas_base_url
+        expected = FAKE_BASE_URL
+
+        assert result == expected
+
+    def test_no_change_for_clean_url(self) -> None:
+        connector = _build_connector(base_url=FAKE_BASE_URL)
+
+        result = connector.canvas_base_url
+        expected = FAKE_BASE_URL
+
+        assert result == expected
+
+
+# ---------------------------------------------------------------------------
+# CanvasConnector — document conversion
+# ---------------------------------------------------------------------------
+
+
+class TestDocumentConversion:
+    def setup_method(self) -> None:
+        self.connector = _build_connector()
+
+    def test_convert_page_to_document(self) -> None:
+        from onyx.connectors.canvas.connector import CanvasPage
+
+        page = CanvasPage(
+            page_id=10,
+            url="syllabus",
+            title="Syllabus",
+            body="<p>Welcome</p>",
+            created_at="2025-01-15T00:00:00Z",
+            updated_at="2025-06-01T12:00:00Z",
+            course_id=1,
+        )
+
+        doc = self.connector._convert_page_to_document(page)
+
+        expected_id = "canvas-page-1-10"
+        expected_metadata = {"course_id": "1", "type": "page"}
+        expected_updated_at = datetime(2025, 6, 1, 12, 0, tzinfo=timezone.utc)
+
+        assert doc.id == expected_id
+        assert doc.source == DocumentSource.CANVAS
+        assert doc.semantic_identifier == "Syllabus"
+        assert doc.metadata == expected_metadata
+        assert doc.sections[0].link is not None
+        assert f"{FAKE_BASE_URL}/courses/1/pages/syllabus" in doc.sections[0].link
+        assert doc.doc_updated_at == expected_updated_at
+
+    def test_convert_page_without_body(self) -> None:
+        from onyx.connectors.canvas.connector import CanvasPage
+
+        page = CanvasPage(
+            page_id=11,
+            url="empty-page",
+            title="Empty Page",
+            body=None,
+            created_at="2025-01-15T00:00:00Z",
+            updated_at="2025-06-01T12:00:00Z",
+            course_id=1,
+        )
+
+        doc = self.connector._convert_page_to_document(page)
+        section_text = doc.sections[0].text
+        assert section_text is not None
+
+        assert "Empty Page" in section_text
+        assert "<p>" not in section_text
+
+    def test_convert_assignment_to_document(self) -> None:
+        from onyx.connectors.canvas.connector import CanvasAssignment
+
+        assignment = CanvasAssignment(
+            id=20,
+            name="Homework 1",
+            description="<p>Solve these</p>",
+            html_url=f"{FAKE_BASE_URL}/courses/1/assignments/20",
+            course_id=1,
+            created_at="2025-01-20T00:00:00Z",
+            updated_at="2025-06-01T12:00:00Z",
+            due_at="2025-02-01T23:59:00Z",
+        )
+
+        doc = self.connector._convert_assignment_to_document(assignment)
+
+        expected_id = "canvas-assignment-1-20"
+        expected_due_text = "Due: February 01, 2025 23:59 UTC"
+
+        assert doc.id == expected_id
+        assert doc.source == DocumentSource.CANVAS
+        assert doc.semantic_identifier == "Homework 1"
+        assert doc.sections[0].text is not None
+        assert expected_due_text in doc.sections[0].text
+
+    def test_convert_assignment_without_description(self) -> None:
+        from onyx.connectors.canvas.connector import CanvasAssignment
+
+        assignment = CanvasAssignment(
+            id=21,
+            name="Quiz 1",
+            description=None,
+            html_url=f"{FAKE_BASE_URL}/courses/1/assignments/21",
+            course_id=1,
+            created_at="2025-01-20T00:00:00Z",
+            updated_at="2025-06-01T12:00:00Z",
+            due_at=None,
+        )
+
+        doc = self.connector._convert_assignment_to_document(assignment)
+        section_text = doc.sections[0].text
+        assert section_text is not None
+
+        assert "Quiz 1" in section_text
+        assert "Due:" not in section_text
+
+    def test_convert_announcement_to_document(self) -> None:
+        from onyx.connectors.canvas.connector import CanvasAnnouncement
+
+        announcement = CanvasAnnouncement(
+            id=30,
+            title="Class Cancelled",
+            message="<p>No class today</p>",
+            html_url=f"{FAKE_BASE_URL}/courses/1/discussion_topics/30",
+            posted_at="2025-06-01T12:00:00Z",
+            course_id=1,
+        )
+
+        doc = self.connector._convert_announcement_to_document(announcement)
+
+        expected_id = "canvas-announcement-1-30"
+        expected_updated_at = datetime(2025, 6, 1, 12, 0, tzinfo=timezone.utc)
+
+        assert doc.id == expected_id
+        assert doc.source == DocumentSource.CANVAS
+        assert doc.semantic_identifier == "Class Cancelled"
+        assert doc.doc_updated_at == expected_updated_at
+
+    def test_convert_announcement_without_posted_at(self) -> None:
+        from onyx.connectors.canvas.connector import CanvasAnnouncement
+
+        announcement = CanvasAnnouncement(
+            id=31,
+            title="TBD Announcement",
+            message=None,
+            html_url=f"{FAKE_BASE_URL}/courses/1/discussion_topics/31",
+            posted_at=None,
+            course_id=1,
+        )
+
+        doc = self.connector._convert_announcement_to_document(announcement)
+
+        assert doc.doc_updated_at is None
+
+
+# ---------------------------------------------------------------------------
+# CanvasConnector — validate_connector_settings
+# ---------------------------------------------------------------------------
+
+
+class TestValidateConnectorSettings:
+    def _assert_validate_raises(
+        self,
+        status_code: int,
+        expected_error: type[Exception],
+        mock_requests: MagicMock,
+    ) -> None:
+        """Helper: assert validate_connector_settings raises expected_error."""
+        success_resp = _mock_response(json_data=[_mock_course()])
+        fail_resp = _mock_response(status_code, {})
+        mock_requests.get.side_effect = [success_resp, fail_resp]
+        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)
+        connector.load_credentials({"canvas_access_token": FAKE_TOKEN})
+        with pytest.raises(expected_error):
+            connector.validate_connector_settings()
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_validate_success(self, mock_requests: MagicMock) -> None:
+        mock_requests.get.return_value = _mock_response(json_data=[_mock_course()])
+        connector = _build_connector()
+
+        connector.validate_connector_settings()  # should not raise
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_validate_expired_credential(self, mock_requests: MagicMock) -> None:
+        self._assert_validate_raises(401, CredentialExpiredError, mock_requests)
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_validate_insufficient_permissions(self, mock_requests: MagicMock) -> None:
+        self._assert_validate_raises(403, InsufficientPermissionsError, mock_requests)
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_validate_rate_limited(self, mock_requests: MagicMock) -> None:
+        self._assert_validate_raises(429, ConnectorValidationError, mock_requests)
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_validate_unexpected_error(self, mock_requests: MagicMock) -> None:
+        self._assert_validate_raises(500, UnexpectedValidationError, mock_requests)
+
+
+# ---------------------------------------------------------------------------
+# _list_* pagination tests
+# ---------------------------------------------------------------------------
+
+
+class TestListCourses:
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_single_page(self, mock_requests: MagicMock) -> None:
+        mock_requests.get.return_value = _mock_response(
+            json_data=[_mock_course(1), _mock_course(2, "CS201", "Data Structures")]
+        )
+        connector = _build_connector()
+
+        result = connector._list_courses()
+
+        assert len(result) == 2
+        assert result[0].id == 1
+        assert result[1].id == 2
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_empty_response(self, mock_requests: MagicMock) -> None:
+        mock_requests.get.return_value = _mock_response(json_data=[])
+        connector = _build_connector()
+
+        result = connector._list_courses()
+
+        assert result == []
+
+
+class TestListPages:
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_single_page(self, mock_requests: MagicMock) -> None:
+        mock_requests.get.return_value = _mock_response(
+            json_data=[_mock_page(10), _mock_page(11, "Notes")]
+        )
+        connector = _build_connector()
+
+        result = connector._list_pages(course_id=1)
+
+        assert len(result) == 2
+        assert result[0].page_id == 10
+        assert result[1].page_id == 11
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_empty_response(self, mock_requests: MagicMock) -> None:
+        mock_requests.get.return_value = _mock_response(json_data=[])
+        connector = _build_connector()
+
+        result = connector._list_pages(course_id=1)
+
+        assert result == []
+
+
+class TestListAssignments:
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_single_page(self, mock_requests: MagicMock) -> None:
+        mock_requests.get.return_value = _mock_response(
+            json_data=[_mock_assignment(20), _mock_assignment(21, "Quiz 1")]
+        )
+        connector = _build_connector()
+
+        result = connector._list_assignments(course_id=1)
+
+        assert len(result) == 2
+        assert result[0].id == 20
+        assert result[1].id == 21
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_empty_response(self, mock_requests: MagicMock) -> None:
+        mock_requests.get.return_value = _mock_response(json_data=[])
+        connector = _build_connector()
+
+        result = connector._list_assignments(course_id=1)
+
+        assert result == []
+
+
+class TestListAnnouncements:
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_single_page(self, mock_requests: MagicMock) -> None:
+        mock_requests.get.return_value = _mock_response(
+            json_data=[_mock_announcement(30), _mock_announcement(31, "Update")]
+        )
+        connector = _build_connector()
+
+        result = connector._list_announcements(course_id=1)
+
+        assert len(result) == 2
+        assert result[0].id == 30
+        assert result[1].id == 31
+
+    @patch("onyx.connectors.canvas.client.rl_requests")
+    def test_empty_response(self, mock_requests: MagicMock) -> None:
+        mock_requests.get.return_value = _mock_response(json_data=[])
+        connector = _build_connector()
+
+        result = connector._list_announcements(course_id=1)
+
+        assert result == []
--- a/backend/tests/unit/onyx/connectors/discord/test_discord_validation.py
+++ b/backend/tests/unit/onyx/connectors/discord/test_discord_validation.py
@@ -0,0 +1,45 @@
+from unittest.mock import AsyncMock
+from unittest.mock import patch
+
+import pytest
+from discord.errors import LoginFailure
+
+from onyx.connectors.discord.connector import DiscordConnector
+from onyx.connectors.exceptions import CredentialInvalidError
+
+
+def _build_connector(token: str = "fake-bot-token") -> DiscordConnector:
+    connector = DiscordConnector()
+    connector.load_credentials({"discord_bot_token": token})
+    return connector
+
+
+@patch("onyx.connectors.discord.connector.Client.close", new_callable=AsyncMock)
+@patch("onyx.connectors.discord.connector.Client.login", new_callable=AsyncMock)
+def test_validate_success(
+    mock_login: AsyncMock,
+    mock_close: AsyncMock,
+) -> None:
+    connector = _build_connector()
+    connector.validate_connector_settings()
+
+    mock_login.assert_awaited_once_with("fake-bot-token")
+    mock_close.assert_awaited_once()
+
+
+@patch("onyx.connectors.discord.connector.Client.close", new_callable=AsyncMock)
+@patch(
+    "onyx.connectors.discord.connector.Client.login",
+    new_callable=AsyncMock,
+    side_effect=LoginFailure("Improper token has been passed."),
+)
+def test_validate_invalid_token(
+    mock_login: AsyncMock,  # noqa: ARG001
+    mock_close: AsyncMock,
+) -> None:
+    connector = _build_connector(token="bad-token")
+
+    with pytest.raises(CredentialInvalidError, match="Invalid Discord bot token"):
+        connector.validate_connector_settings()
+
+    mock_close.assert_awaited_once()
--- a/backend/tests/unit/onyx/db/test_chat_sessions.py
+++ b/backend/tests/unit/onyx/db/test_chat_sessions.py
@@ -0,0 +1,225 @@
+"""Tests for get_chat_sessions_by_user filtering behavior.
+
+Verifies that failed chat sessions (those with only SYSTEM messages) are
+correctly filtered out while preserving recently created sessions, matching
+the behavior specified in PR #7233.
+"""
+
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone
+from unittest.mock import MagicMock
+from uuid import UUID
+from uuid import uuid4
+
+import pytest
+from sqlalchemy.orm import Session
+
+from onyx.db.chat import get_chat_sessions_by_user
+from onyx.db.models import ChatSession
+
+
+def _make_session(
+    user_id: UUID,
+    time_created: datetime | None = None,
+    time_updated: datetime | None = None,
+    description: str = "",
+) -> MagicMock:
+    """Create a mock ChatSession with the given attributes."""
+    session = MagicMock(spec=ChatSession)
+    session.id = uuid4()
+    session.user_id = user_id
+    session.time_created = time_created or datetime.now(timezone.utc)
+    session.time_updated = time_updated or session.time_created
+    session.description = description
+    session.deleted = False
+    session.onyxbot_flow = False
+    session.project_id = None
+    return session
+
+
+@pytest.fixture
+def user_id() -> UUID:
+    return uuid4()
+
+
+@pytest.fixture
+def old_time() -> datetime:
+    """A timestamp well outside the 5-minute leeway window."""
+    return datetime.now(timezone.utc) - timedelta(hours=1)
+
+
+@pytest.fixture
+def recent_time() -> datetime:
+    """A timestamp within the 5-minute leeway window."""
+    return datetime.now(timezone.utc) - timedelta(minutes=2)
+
+
+class TestGetChatSessionsByUser:
+    """Tests for the failed chat filtering logic in get_chat_sessions_by_user."""
+
+    def test_filters_out_failed_sessions(
+        self, user_id: UUID, old_time: datetime
+    ) -> None:
+        """Sessions with only SYSTEM messages should be excluded."""
+        valid_session = _make_session(user_id, time_created=old_time)
+        failed_session = _make_session(user_id, time_created=old_time)
+
+        db_session = MagicMock(spec=Session)
+
+        # First execute: returns all sessions
+        # Second execute: returns only the valid session's ID (has non-system msgs)
+        mock_result_1 = MagicMock()
+        mock_result_1.scalars.return_value.all.return_value = [
+            valid_session,
+            failed_session,
+        ]
+
+        mock_result_2 = MagicMock()
+        mock_result_2.scalars.return_value.all.return_value = [valid_session.id]
+
+        db_session.execute.side_effect = [mock_result_1, mock_result_2]
+
+        result = get_chat_sessions_by_user(
+            user_id=user_id,
+            deleted=False,
+            db_session=db_session,
+            include_failed_chats=False,
+        )
+
+        assert len(result) == 1
+        assert result[0].id == valid_session.id
+
+    def test_keeps_recent_sessions_without_messages(
+        self, user_id: UUID, recent_time: datetime
+    ) -> None:
+        """Recently created sessions should be kept even without messages."""
+        recent_session = _make_session(user_id, time_created=recent_time)
+
+        db_session = MagicMock(spec=Session)
+
+        mock_result_1 = MagicMock()
+        mock_result_1.scalars.return_value.all.return_value = [recent_session]
+
+        db_session.execute.side_effect = [mock_result_1]
+
+        result = get_chat_sessions_by_user(
+            user_id=user_id,
+            deleted=False,
+            db_session=db_session,
+            include_failed_chats=False,
+        )
+
+        assert len(result) == 1
+        assert result[0].id == recent_session.id
+        # Should only have been called once — no second query needed
+        # because the recent session is within the leeway window
+        assert db_session.execute.call_count == 1
+
+    def test_include_failed_chats_skips_filtering(
+        self, user_id: UUID, old_time: datetime
+    ) -> None:
+        """When include_failed_chats=True, no filtering should occur."""
+        session_a = _make_session(user_id, time_created=old_time)
+        session_b = _make_session(user_id, time_created=old_time)
+
+        db_session = MagicMock(spec=Session)
+
+        mock_result = MagicMock()
+        mock_result.scalars.return_value.all.return_value = [session_a, session_b]
+
+        db_session.execute.side_effect = [mock_result]
+
+        result = get_chat_sessions_by_user(
+            user_id=user_id,
+            deleted=False,
+            db_session=db_session,
+            include_failed_chats=True,
+        )
+
+        assert len(result) == 2
+        # Only one DB call — no second query for message validation
+        assert db_session.execute.call_count == 1
+
+    def test_limit_applied_after_filtering(
+        self, user_id: UUID, old_time: datetime
+    ) -> None:
+        """Limit should be applied after filtering, not before."""
+        sessions = [_make_session(user_id, time_created=old_time) for _ in range(5)]
+        valid_ids = [s.id for s in sessions[:3]]
+
+        db_session = MagicMock(spec=Session)
+
+        mock_result_1 = MagicMock()
+        mock_result_1.scalars.return_value.all.return_value = sessions
+
+        mock_result_2 = MagicMock()
+        mock_result_2.scalars.return_value.all.return_value = valid_ids
+
+        db_session.execute.side_effect = [mock_result_1, mock_result_2]
+
+        result = get_chat_sessions_by_user(
+            user_id=user_id,
+            deleted=False,
+            db_session=db_session,
+            include_failed_chats=False,
+            limit=2,
+        )
+
+        assert len(result) == 2
+        # Should be the first 2 valid sessions (order preserved)
+        assert result[0].id == sessions[0].id
+        assert result[1].id == sessions[1].id
+
+    def test_mixed_recent_and_old_sessions(
+        self, user_id: UUID, old_time: datetime, recent_time: datetime
+    ) -> None:
+        """Mix of recent and old sessions should filter correctly."""
+        old_valid = _make_session(user_id, time_created=old_time)
+        old_failed = _make_session(user_id, time_created=old_time)
+        recent_no_msgs = _make_session(user_id, time_created=recent_time)
+
+        db_session = MagicMock(spec=Session)
+
+        mock_result_1 = MagicMock()
+        mock_result_1.scalars.return_value.all.return_value = [
+            old_valid,
+            old_failed,
+            recent_no_msgs,
+        ]
+
+        mock_result_2 = MagicMock()
+        mock_result_2.scalars.return_value.all.return_value = [old_valid.id]
+
+        db_session.execute.side_effect = [mock_result_1, mock_result_2]
+
+        result = get_chat_sessions_by_user(
+            user_id=user_id,
+            deleted=False,
+            db_session=db_session,
+            include_failed_chats=False,
+        )
+
+        result_ids = {cs.id for cs in result}
+        assert old_valid.id in result_ids
+        assert recent_no_msgs.id in result_ids
+        assert old_failed.id not in result_ids
+
+    def test_empty_result(self, user_id: UUID) -> None:
+        """No sessions should return empty list without errors."""
+        db_session = MagicMock(spec=Session)
+
+        mock_result = MagicMock()
+        mock_result.scalars.return_value.all.return_value = []
+
+        db_session.execute.side_effect = [mock_result]
+
+        result = get_chat_sessions_by_user(
+            user_id=user_id,
+            deleted=False,
+            db_session=db_session,
+            include_failed_chats=False,
+        )
+
+        assert result == []
+        assert db_session.execute.call_count == 1
--- a/backend/tests/unit/onyx/document_index/opensearch/test_opensearch_batch_flush.py
+++ b/backend/tests/unit/onyx/document_index/opensearch/test_opensearch_batch_flush.py
@@ -0,0 +1,223 @@
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from onyx.access.models import DocumentAccess
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.models import Document
+from onyx.connectors.models import TextSection
+from onyx.document_index.interfaces_new import IndexingMetadata
+from onyx.document_index.interfaces_new import TenantState
+from onyx.document_index.opensearch.opensearch_document_index import (
+    OpenSearchDocumentIndex,
+)
+from onyx.indexing.models import ChunkEmbedding
+from onyx.indexing.models import DocMetadataAwareIndexChunk
+
+
+def _make_chunk(
+    doc_id: str,
+    chunk_id: int,
+) -> DocMetadataAwareIndexChunk:
+    """Creates a minimal DocMetadataAwareIndexChunk for testing."""
+    doc = Document(
+        id=doc_id,
+        sections=[TextSection(text="test", link="http://test.com")],
+        source=DocumentSource.FILE,
+        semantic_identifier="test_doc",
+        metadata={},
+    )
+    access = DocumentAccess.build(
+        user_emails=[],
+        user_groups=[],
+        external_user_emails=[],
+        external_user_group_ids=[],
+        is_public=True,
+    )
+    return DocMetadataAwareIndexChunk(
+        chunk_id=chunk_id,
+        blurb="test",
+        content="test content",
+        source_links={0: "http://test.com"},
+        image_file_id=None,
+        section_continuation=False,
+        source_document=doc,
+        title_prefix="",
+        metadata_suffix_semantic="",
+        metadata_suffix_keyword="",
+        mini_chunk_texts=None,
+        large_chunk_id=None,
+        doc_summary="",
+        chunk_context="",
+        contextual_rag_reserved_tokens=0,
+        embeddings=ChunkEmbedding(full_embedding=[0.1] * 10, mini_chunk_embeddings=[]),
+        title_embedding=[0.1] * 10,
+        tenant_id="test_tenant",
+        access=access,
+        document_sets=set(),
+        user_project=[],
+        personas=[],
+        boost=0,
+        aggregated_chunk_boost_factor=1.0,
+        ancestor_hierarchy_node_ids=[],
+    )
+
+
+def _make_index() -> tuple[OpenSearchDocumentIndex, MagicMock]:
+    """Creates an OpenSearchDocumentIndex with a mocked client.
+    Returns the index and the mock for bulk_index_documents."""
+    mock_client = MagicMock()
+    mock_bulk = MagicMock()
+    mock_client.bulk_index_documents = mock_bulk
+
+    tenant_state = TenantState(tenant_id="test_tenant", multitenant=False)
+
+    index = OpenSearchDocumentIndex.__new__(OpenSearchDocumentIndex)
+    index._index_name = "test_index"
+    index._client = mock_client
+    index._tenant_state = tenant_state
+
+    return index, mock_bulk
+
+
+def _make_metadata(doc_id: str, chunk_count: int) -> IndexingMetadata:
+    return IndexingMetadata(
+        doc_id_to_chunk_cnt_diff={
+            doc_id: IndexingMetadata.ChunkCounts(
+                old_chunk_cnt=0,
+                new_chunk_cnt=chunk_count,
+            ),
+        },
+    )
+
+
+@patch(
+    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
+    100,
+)
+def test_single_doc_under_batch_limit_flushes_once() -> None:
+    """A document with fewer chunks than MAX_CHUNKS_PER_DOC_BATCH should flush once."""
+    index, mock_bulk = _make_index()
+    doc_id = "doc_1"
+    num_chunks = 50
+    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
+    metadata = _make_metadata(doc_id, num_chunks)
+
+    with patch.object(index, "delete", return_value=0):
+        index.index(chunks, metadata)
+
+    assert mock_bulk.call_count == 1
+    batch_arg = mock_bulk.call_args_list[0]
+    assert len(batch_arg.kwargs["documents"]) == num_chunks
+
+
+@patch(
+    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
+    100,
+)
+def test_single_doc_over_batch_limit_flushes_multiple_times() -> None:
+    """A document with more chunks than MAX_CHUNKS_PER_DOC_BATCH should flush multiple times."""
+    index, mock_bulk = _make_index()
+    doc_id = "doc_1"
+    num_chunks = 250
+    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
+    metadata = _make_metadata(doc_id, num_chunks)
+
+    with patch.object(index, "delete", return_value=0):
+        index.index(chunks, metadata)
+
+    # 250 chunks / 100 per batch = 3 flushes (100 + 100 + 50)
+    assert mock_bulk.call_count == 3
+    batch_sizes = [len(call.kwargs["documents"]) for call in mock_bulk.call_args_list]
+    assert batch_sizes == [100, 100, 50]
+
+
+@patch(
+    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
+    100,
+)
+def test_single_doc_exactly_at_batch_limit() -> None:
+    """A document with exactly MAX_CHUNKS_PER_DOC_BATCH chunks should flush once
+    (the flush happens on the next chunk, not at the boundary)."""
+    index, mock_bulk = _make_index()
+    doc_id = "doc_1"
+    num_chunks = 100
+    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
+    metadata = _make_metadata(doc_id, num_chunks)
+
+    with patch.object(index, "delete", return_value=0):
+        index.index(chunks, metadata)
+
+    # 100 chunks hit the >= check on chunk 101 which doesn't exist,
+    # so final flush handles all 100
+    # Actually: the elif fires when len(current_chunks) >= 100, which happens
+    # when current_chunks has 100 items and the 101st chunk arrives.
+    # With exactly 100 chunks, the 100th chunk makes len == 99, then appended -> 100.
+    # No 101st chunk arrives, so the final flush handles all 100.
+    assert mock_bulk.call_count == 1
+
+
+@patch(
+    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
+    100,
+)
+def test_single_doc_one_over_batch_limit() -> None:
+    """101 chunks for one doc: first 100 flushed when the 101st arrives, then
+    the 101st is flushed at the end."""
+    index, mock_bulk = _make_index()
+    doc_id = "doc_1"
+    num_chunks = 101
+    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
+    metadata = _make_metadata(doc_id, num_chunks)
+
+    with patch.object(index, "delete", return_value=0):
+        index.index(chunks, metadata)
+
+    assert mock_bulk.call_count == 2
+    batch_sizes = [len(call.kwargs["documents"]) for call in mock_bulk.call_args_list]
+    assert batch_sizes == [100, 1]
+
+
+@patch(
+    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
+    100,
+)
+def test_multiple_docs_each_under_limit_flush_per_doc() -> None:
+    """Multiple documents each under the batch limit should flush once per document."""
+    index, mock_bulk = _make_index()
+    chunks = []
+    for doc_idx in range(3):
+        doc_id = f"doc_{doc_idx}"
+        for chunk_idx in range(50):
+            chunks.append(_make_chunk(doc_id, chunk_idx))
+
+    metadata = IndexingMetadata(
+        doc_id_to_chunk_cnt_diff={
+            f"doc_{i}": IndexingMetadata.ChunkCounts(old_chunk_cnt=0, new_chunk_cnt=50)
+            for i in range(3)
+        },
+    )
+
+    with patch.object(index, "delete", return_value=0):
+        index.index(chunks, metadata)
+
+    # 3 documents = 3 flushes (one per doc boundary + final)
+    assert mock_bulk.call_count == 3
+
+
+@patch(
+    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
+    100,
+)
+def test_delete_called_once_per_document() -> None:
+    """Even with multiple flushes for a single document, delete should only be
+    called once per document."""
+    index, _mock_bulk = _make_index()
+    doc_id = "doc_1"
+    num_chunks = 250
+    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
+    metadata = _make_metadata(doc_id, num_chunks)
+
+    with patch.object(index, "delete", return_value=0) as mock_delete:
+        index.index(chunks, metadata)
+
+    mock_delete.assert_called_once_with(doc_id, None)
--- a/backend/tests/unit/onyx/document_index/vespa/test_vespa_batch_flush.py
+++ b/backend/tests/unit/onyx/document_index/vespa/test_vespa_batch_flush.py
@@ -0,0 +1,152 @@
+"""Unit tests for VespaDocumentIndex.index().
+
+These tests mock all external I/O (HTTP calls, thread pools) and verify
+the streaming logic, ID cleaning/mapping, and DocumentInsertionRecord
+construction.
+"""
+
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from onyx.access.models import DocumentAccess
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.models import Document
+from onyx.connectors.models import TextSection
+from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
+from onyx.document_index.interfaces_new import IndexingMetadata
+from onyx.document_index.interfaces_new import TenantState
+from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
+from onyx.indexing.models import ChunkEmbedding
+from onyx.indexing.models import DocMetadataAwareIndexChunk
+from onyx.indexing.models import IndexChunk
+
+
+def _make_chunk(
+    doc_id: str,
+    chunk_id: int = 0,
+    content: str = "test content",
+) -> DocMetadataAwareIndexChunk:
+    doc = Document(
+        id=doc_id,
+        semantic_identifier="test_doc",
+        sections=[TextSection(text=content, link=None)],
+        source=DocumentSource.NOT_APPLICABLE,
+        metadata={},
+    )
+    index_chunk = IndexChunk(
+        chunk_id=chunk_id,
+        blurb=content[:50],
+        content=content,
+        source_links=None,
+        image_file_id=None,
+        section_continuation=False,
+        source_document=doc,
+        title_prefix="",
+        metadata_suffix_semantic="",
+        metadata_suffix_keyword="",
+        contextual_rag_reserved_tokens=0,
+        doc_summary="",
+        chunk_context="",
+        mini_chunk_texts=None,
+        large_chunk_id=None,
+        embeddings=ChunkEmbedding(
+            full_embedding=[0.1] * 10,
+            mini_chunk_embeddings=[],
+        ),
+        title_embedding=None,
+    )
+    access = DocumentAccess.build(
+        user_emails=[],
+        user_groups=[],
+        external_user_emails=[],
+        external_user_group_ids=[],
+        is_public=True,
+    )
+    return DocMetadataAwareIndexChunk.from_index_chunk(
+        index_chunk=index_chunk,
+        access=access,
+        document_sets=set(),
+        user_project=[],
+        personas=[],
+        boost=0,
+        aggregated_chunk_boost_factor=1.0,
+        tenant_id="test_tenant",
+    )
+
+
+def _make_indexing_metadata(
+    doc_ids: list[str],
+    old_counts: list[int],
+    new_counts: list[int],
+) -> IndexingMetadata:
+    return IndexingMetadata(
+        doc_id_to_chunk_cnt_diff={
+            doc_id: IndexingMetadata.ChunkCounts(
+                old_chunk_cnt=old,
+                new_chunk_cnt=new,
+            )
+            for doc_id, old, new in zip(doc_ids, old_counts, new_counts)
+        }
+    )
+
+
+def _stub_enrich(
+    doc_id: str,
+    old_chunk_cnt: int,
+) -> EnrichedDocumentIndexingInfo:
+    """Build an EnrichedDocumentIndexingInfo that says 'no chunks to delete'
+    when old_chunk_cnt == 0, or 'has existing chunks' otherwise."""
+    return EnrichedDocumentIndexingInfo(
+        doc_id=doc_id,
+        chunk_start_index=0,
+        old_version=False,
+        chunk_end_index=old_chunk_cnt,
+    )
+
+
+@patch("onyx.document_index.vespa.vespa_document_index.batch_index_vespa_chunks")
+@patch("onyx.document_index.vespa.vespa_document_index.delete_vespa_chunks")
+@patch(
+    "onyx.document_index.vespa.vespa_document_index.get_document_chunk_ids",
+    return_value=[],
+)
+@patch("onyx.document_index.vespa.vespa_document_index._enrich_basic_chunk_info")
+@patch(
+    "onyx.document_index.vespa.vespa_document_index.BATCH_SIZE",
+    3,
+)
+def test_index_respects_batch_size(
+    mock_enrich: MagicMock,
+    mock_get_chunk_ids: MagicMock,  # noqa: ARG001
+    mock_delete: MagicMock,  # noqa: ARG001
+    mock_batch_index: MagicMock,
+) -> None:
+    """When chunks exceed BATCH_SIZE, batch_index_vespa_chunks is called
+    multiple times with correctly sized batches."""
+    mock_enrich.return_value = _stub_enrich("doc1", old_chunk_cnt=0)
+
+    index = VespaDocumentIndex(
+        index_name="test_index",
+        tenant_state=TenantState(tenant_id="test_tenant", multitenant=False),
+        large_chunks_enabled=False,
+        httpx_client=MagicMock(),
+    )
+
+    chunks = [_make_chunk("doc1", chunk_id=i) for i in range(7)]
+    metadata = _make_indexing_metadata(["doc1"], old_counts=[0], new_counts=[7])
+
+    results = index.index(chunks=chunks, indexing_metadata=metadata)
+
+    assert len(results) == 1
+
+    # With BATCH_SIZE=3 and 7 chunks: batches of 3, 3, 1
+    assert mock_batch_index.call_count == 3
+    batch_sizes = [len(c.kwargs["chunks"]) for c in mock_batch_index.call_args_list]
+    assert batch_sizes == [3, 3, 1]
+
+    # Verify all chunks are accounted for and in order
+    all_indexed = [
+        chunk for c in mock_batch_index.call_args_list for chunk in c.kwargs["chunks"]
+    ]
+    assert len(all_indexed) == 7
+    assert [c.chunk_id for c in all_indexed] == list(range(7))
--- a/backend/tests/unit/onyx/hooks/test_api_dependencies.py
+++ b/backend/tests/unit/onyx/hooks/test_api_dependencies.py
@@ -11,30 +11,13 @@ from onyx.hooks.api_dependencies import require_hook_enabled

 class TestRequireHookEnabled:
    def test_raises_when_multi_tenant(self) -> None:
-        with (
-            patch("onyx.hooks.api_dependencies.MULTI_TENANT", True),
-            patch("onyx.hooks.api_dependencies.HOOK_ENABLED", True),
-        ):
+        with patch("onyx.hooks.api_dependencies.MULTI_TENANT", True):
            with pytest.raises(OnyxError) as exc_info:
                require_hook_enabled()
        assert exc_info.value.error_code is OnyxErrorCode.SINGLE_TENANT_ONLY
        assert exc_info.value.status_code == 403
        assert "multi-tenant" in exc_info.value.detail

-    def test_raises_when_flag_disabled(self) -> None:
-        with (
-            patch("onyx.hooks.api_dependencies.MULTI_TENANT", False),
-            patch("onyx.hooks.api_dependencies.HOOK_ENABLED", False),
-        ):
-            with pytest.raises(OnyxError) as exc_info:
-                require_hook_enabled()
-        assert exc_info.value.error_code is OnyxErrorCode.ENV_VAR_GATED
-        assert exc_info.value.status_code == 403
-        assert "HOOK_ENABLED" in exc_info.value.detail
-
-    def test_passes_when_enabled_single_tenant(self) -> None:
-        with (
-            patch("onyx.hooks.api_dependencies.MULTI_TENANT", False),
-            patch("onyx.hooks.api_dependencies.HOOK_ENABLED", True),
-        ):
+    def test_passes_when_single_tenant(self) -> None:
+        with patch("onyx.hooks.api_dependencies.MULTI_TENANT", False):
            require_hook_enabled()  # must not raise
--- a/backend/tests/unit/onyx/indexing/test_embed_chunks_in_batches.py
+++ b/backend/tests/unit/onyx/indexing/test_embed_chunks_in_batches.py
@@ -0,0 +1,391 @@
+"""Unit tests for _embed_chunks_to_store.
+
+Tests cover:
+  - Single batch, no failures
+  - Multiple batches, no failures
+  - Failure in a single batch
+  - Cross-batch document failure scrubbing
+  - Later batches skip already-failed docs
+  - Empty input
+  - All chunks fail
+"""
+
+from collections.abc import Callable
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from onyx.connectors.models import ConnectorFailure
+from onyx.connectors.models import Document
+from onyx.connectors.models import DocumentFailure
+from onyx.connectors.models import DocumentSource
+from onyx.connectors.models import TextSection
+from onyx.indexing.chunk_batch_store import ChunkBatchStore
+from onyx.indexing.indexing_pipeline import _embed_chunks_to_store
+from onyx.indexing.models import ChunkEmbedding
+from onyx.indexing.models import DocAwareChunk
+from onyx.indexing.models import IndexChunk
+
+
+def _make_doc(doc_id: str) -> Document:
+    return Document(
+        id=doc_id,
+        semantic_identifier="test",
+        source=DocumentSource.FILE,
+        sections=[TextSection(text="test", link=None)],
+        metadata={},
+    )
+
+
+def _make_chunk(doc_id: str, chunk_id: int) -> DocAwareChunk:
+    return DocAwareChunk(
+        chunk_id=chunk_id,
+        blurb="test",
+        content="test content",
+        source_links=None,
+        image_file_id=None,
+        section_continuation=False,
+        source_document=_make_doc(doc_id),
+        title_prefix="",
+        metadata_suffix_semantic="",
+        metadata_suffix_keyword="",
+        mini_chunk_texts=None,
+        large_chunk_id=None,
+        doc_summary="",
+        chunk_context="",
+        contextual_rag_reserved_tokens=0,
+    )
+
+
+def _make_index_chunk(doc_id: str, chunk_id: int) -> IndexChunk:
+    """Create an IndexChunk (a DocAwareChunk with embeddings)."""
+    return IndexChunk(
+        chunk_id=chunk_id,
+        blurb="test",
+        content="test content",
+        source_links=None,
+        image_file_id=None,
+        section_continuation=False,
+        source_document=_make_doc(doc_id),
+        title_prefix="",
+        metadata_suffix_semantic="",
+        metadata_suffix_keyword="",
+        mini_chunk_texts=None,
+        large_chunk_id=None,
+        doc_summary="",
+        chunk_context="",
+        contextual_rag_reserved_tokens=0,
+        embeddings=ChunkEmbedding(
+            full_embedding=[0.1] * 10,
+            mini_chunk_embeddings=[],
+        ),
+        title_embedding=None,
+    )
+
+
+def _make_failure(doc_id: str) -> ConnectorFailure:
+    return ConnectorFailure(
+        failed_document=DocumentFailure(document_id=doc_id, document_link=None),
+        failure_message="embedding failed",
+        exception=RuntimeError("embedding failed"),
+    )
+
+
+def _mock_embed_success(
+    chunks: list[DocAwareChunk], **_kwargs: object
+) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
+    """Simulate successful embedding of all chunks."""
+    return (
+        [_make_index_chunk(c.source_document.id, c.chunk_id) for c in chunks],
+        [],
+    )
+
+
+def _mock_embed_fail_doc(
+    fail_doc_id: str,
+) -> Callable[..., tuple[list[IndexChunk], list[ConnectorFailure]]]:
+    """Return an embed mock that fails all chunks for a specific doc."""
+
+    def _embed(
+        chunks: list[DocAwareChunk], **_kwargs: object
+    ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
+        successes = [
+            _make_index_chunk(c.source_document.id, c.chunk_id)
+            for c in chunks
+            if c.source_document.id != fail_doc_id
+        ]
+        failures = (
+            [_make_failure(fail_doc_id)]
+            if any(c.source_document.id == fail_doc_id for c in chunks)
+            else []
+        )
+        return successes, failures
+
+    return _embed
+
+
+class TestEmbedChunksInBatches:
+    @patch(
+        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
+    )
+    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 100)
+    def test_single_batch_no_failures(self, mock_embed: MagicMock) -> None:
+        """All chunks fit in one batch and embed successfully."""
+        mock_embed.side_effect = _mock_embed_success
+
+        with ChunkBatchStore() as store:
+            chunks = [_make_chunk("doc1", i) for i in range(3)]
+            result = _embed_chunks_to_store(
+                chunks=chunks,
+                embedder=MagicMock(),
+                tenant_id="test",
+                request_id=None,
+                store=store,
+            )
+
+            assert len(result.successful_chunk_ids) == 3
+            assert len(result.connector_failures) == 0
+
+            # Verify stored contents
+            assert len(store._batch_files()) == 1
+            stored = list(store.stream())
+            assert len(stored) == 3
+
+    @patch(
+        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
+    )
+    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
+    def test_multiple_batches_no_failures(self, mock_embed: MagicMock) -> None:
+        """Chunks are split across multiple batches, all succeed."""
+        mock_embed.side_effect = _mock_embed_success
+
+        with ChunkBatchStore() as store:
+            chunks = [_make_chunk("doc1", i) for i in range(7)]
+            result = _embed_chunks_to_store(
+                chunks=chunks,
+                embedder=MagicMock(),
+                tenant_id="test",
+                request_id=None,
+                store=store,
+            )
+
+            assert len(result.successful_chunk_ids) == 7
+            assert len(result.connector_failures) == 0
+            assert len(store._batch_files()) == 3  # 3 + 3 + 1
+
+    @patch(
+        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
+    )
+    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 100)
+    def test_single_batch_with_failure(self, mock_embed: MagicMock) -> None:
+        """One doc fails embedding, its chunks are excluded from results."""
+        mock_embed.side_effect = _mock_embed_fail_doc("doc2")
+
+        with ChunkBatchStore() as store:
+            chunks = [
+                _make_chunk("doc1", 0),
+                _make_chunk("doc2", 1),
+                _make_chunk("doc1", 2),
+            ]
+            result = _embed_chunks_to_store(
+                chunks=chunks,
+                embedder=MagicMock(),
+                tenant_id="test",
+                request_id=None,
+                store=store,
+            )
+
+            assert len(result.connector_failures) == 1
+            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}
+            assert "doc2" not in successful_doc_ids
+            assert "doc1" in successful_doc_ids
+
+    @patch(
+        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
+    )
+    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
+    def test_cross_batch_failure_scrubs_earlier_batch(
+        self, mock_embed: MagicMock
+    ) -> None:
+        """Doc A spans batches 0 and 1.  It succeeds in batch 0 but fails in
+        batch 1.  Its chunks should be scrubbed from batch 0's batch file."""
+        call_count = 0
+
+        def _embed(
+            chunks: list[DocAwareChunk], **_kwargs: object
+        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return _mock_embed_success(chunks)
+            else:
+                return _mock_embed_fail_doc("docA")(chunks)
+
+        mock_embed.side_effect = _embed
+
+        with ChunkBatchStore() as store:
+            chunks = [
+                _make_chunk("docA", 0),
+                _make_chunk("docA", 1),
+                _make_chunk("docA", 2),
+                _make_chunk("docA", 3),
+                _make_chunk("docB", 0),
+                _make_chunk("docB", 1),
+            ]
+            result = _embed_chunks_to_store(
+                chunks=chunks,
+                embedder=MagicMock(),
+                tenant_id="test",
+                request_id=None,
+                store=store,
+            )
+
+            # docA should be fully excluded from results
+            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}
+            assert "docA" not in successful_doc_ids
+            assert "docB" in successful_doc_ids
+            assert len(result.connector_failures) == 1
+
+            # Verify batch 0 was scrubbed of docA chunks
+            all_stored = list(store.stream())
+            stored_doc_ids = {c.source_document.id for c in all_stored}
+            assert "docA" not in stored_doc_ids
+            assert "docB" in stored_doc_ids
+
+    @patch(
+        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
+    )
+    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
+    def test_later_batch_skips_already_failed_doc(self, mock_embed: MagicMock) -> None:
+        """If docA fails in batch 0, its chunks in batch 1 are skipped
+        entirely (never sent to the embedder)."""
+        embedded_doc_ids: list[str] = []
+
+        def _embed(
+            chunks: list[DocAwareChunk], **_kwargs: object
+        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
+            for c in chunks:
+                embedded_doc_ids.append(c.source_document.id)
+            return _mock_embed_fail_doc("docA")(chunks)
+
+        mock_embed.side_effect = _embed
+
+        with ChunkBatchStore() as store:
+            chunks = [
+                _make_chunk("docA", 0),
+                _make_chunk("docA", 1),
+                _make_chunk("docA", 2),
+                _make_chunk("docA", 3),
+                _make_chunk("docB", 0),
+                _make_chunk("docB", 1),
+            ]
+            _embed_chunks_to_store(
+                chunks=chunks,
+                embedder=MagicMock(),
+                tenant_id="test",
+                request_id=None,
+                store=store,
+            )
+
+        # docA should only appear in batch 0, not batch 1
+        batch_1_doc_ids = embedded_doc_ids[3:]
+        assert "docA" not in batch_1_doc_ids
+
+    @patch(
+        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
+    )
+    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
+    def test_failed_doc_skipped_in_later_batch_while_other_doc_succeeds(
+        self, mock_embed: MagicMock
+    ) -> None:
+        """doc1 spans batches 0 and 1, doc2 only in batch 1.  Batch 0 fails
+        doc1.  In batch 1, doc1 chunks should be skipped but doc2 chunks
+        should still be embedded successfully."""
+        embedded_chunks: list[list[str]] = []
+
+        def _embed(
+            chunks: list[DocAwareChunk], **_kwargs: object
+        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
+            embedded_chunks.append([c.source_document.id for c in chunks])
+            return _mock_embed_fail_doc("doc1")(chunks)
+
+        mock_embed.side_effect = _embed
+
+        with ChunkBatchStore() as store:
+            chunks = [
+                _make_chunk("doc1", 0),
+                _make_chunk("doc1", 1),
+                _make_chunk("doc1", 2),
+                _make_chunk("doc1", 3),
+                _make_chunk("doc2", 0),
+                _make_chunk("doc2", 1),
+            ]
+            result = _embed_chunks_to_store(
+                chunks=chunks,
+                embedder=MagicMock(),
+                tenant_id="test",
+                request_id=None,
+                store=store,
+            )
+
+            # doc1 should be fully excluded, doc2 fully included
+            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}
+            assert "doc1" not in successful_doc_ids
+            assert "doc2" in successful_doc_ids
+            assert len(result.successful_chunk_ids) == 2  # doc2's 2 chunks
+
+            # Batch 1 should only contain doc2 (doc1 was filtered before embedding)
+            assert len(embedded_chunks) == 2
+            assert "doc1" not in embedded_chunks[1]
+            assert embedded_chunks[1] == ["doc2", "doc2"]
+
+            # Verify on-disk state has no doc1 chunks
+            all_stored = list(store.stream())
+            assert all(c.source_document.id == "doc2" for c in all_stored)
+
+    @patch(
+        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
+    )
+    def test_empty_input(self, mock_embed: MagicMock) -> None:
+        """Empty chunk list produces empty results."""
+        mock_embed.side_effect = _mock_embed_success
+
+        with ChunkBatchStore() as store:
+            result = _embed_chunks_to_store(
+                chunks=[],
+                embedder=MagicMock(),
+                tenant_id="test",
+                request_id=None,
+                store=store,
+            )
+
+            assert len(result.successful_chunk_ids) == 0
+            assert len(result.connector_failures) == 0
+            mock_embed.assert_not_called()
+
+    @patch(
+        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
+    )
+    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 100)
+    def test_all_chunks_fail(self, mock_embed: MagicMock) -> None:
+        """When all documents fail, results have no successful chunks."""
+
+        def _fail_all(
+            chunks: list[DocAwareChunk], **_kwargs: object
+        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
+            doc_ids = {c.source_document.id for c in chunks}
+            return [], [_make_failure(doc_id) for doc_id in doc_ids]
+
+        mock_embed.side_effect = _fail_all
+
+        with ChunkBatchStore() as store:
+            chunks = [_make_chunk("doc1", 0), _make_chunk("doc2", 1)]
+            result = _embed_chunks_to_store(
+                chunks=chunks,
+                embedder=MagicMock(),
+                tenant_id="test",
+                request_id=None,
+                store=store,
+            )
+
+            assert len(result.successful_chunk_ids) == 0
+            assert len(result.connector_failures) == 2
--- a/backend/tests/unit/onyx/indexing/test_personas_in_chunks.py
+++ b/backend/tests/unit/onyx/indexing/test_personas_in_chunks.py
@@ -116,7 +116,7 @@ def _run_adapter_build(
    project_ids_map: dict[str, list[int]],
    persona_ids_map: dict[str, list[int]],
 ) -> list[DocMetadataAwareIndexChunk]:
-    """Helper that runs UserFileIndexingAdapter.build_metadata_aware_chunks
+    """Helper that runs UserFileIndexingAdapter.prepare_enrichment + enrich_chunk
    with all external dependencies mocked."""
    from onyx.indexing.adapters.user_file_indexing_adapter import (
        UserFileIndexingAdapter,
@@ -155,18 +155,16 @@ def _run_adapter_build(
            side_effect=Exception("no LLM in tests"),
        ),
    ):
-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=[chunk],
-            chunk_content_scores=[1.0],
-            tenant_id="test_tenant",
+        enricher = adapter.prepare_enrichment(
            context=context,
+            tenant_id="test_tenant",
+            chunks=[chunk],
        )
-
-    return result.chunks
+        return [enricher.enrich_chunk(chunk, 1.0)]


-def test_build_metadata_aware_chunks_includes_persona_ids() -> None:
-    """UserFileIndexingAdapter.build_metadata_aware_chunks writes persona IDs
+def test_prepare_enrichment_includes_persona_ids() -> None:
+    """UserFileIndexingAdapter.prepare_enrichment writes persona IDs
    fetched from the DB into each chunk's metadata."""
    file_id = str(uuid4())
    persona_ids = [5, 12]
@@ -183,7 +181,7 @@ def test_build_metadata_aware_chunks_includes_persona_ids() -> None:
    assert chunks[0].user_project == project_ids


-def test_build_metadata_aware_chunks_missing_file_defaults_to_empty() -> None:
+def test_prepare_enrichment_missing_file_defaults_to_empty() -> None:
    """When a file has no persona or project associations in the DB, the
    adapter should default to empty lists (not KeyError or None)."""
    file_id = str(uuid4())
--- a/backend/tests/unit/onyx/llm/test_multi_llm.py
+++ b/backend/tests/unit/onyx/llm/test_multi_llm.py
@@ -11,6 +11,7 @@ from litellm.types.utils import ChatCompletionDeltaToolCall
 from litellm.types.utils import Delta
 from litellm.types.utils import Function as LiteLLMFunction

+import onyx.llm.models
 from onyx.configs.app_configs import MOCK_LLM_RESPONSE
 from onyx.llm.constants import LlmProviderNames
 from onyx.llm.interfaces import LLMUserIdentity
@@ -1479,6 +1480,147 @@ def test_bifrost_normalizes_api_base_in_model_kwargs() -> None:
    assert llm._model_kwargs["api_base"] == "https://bifrost.example.com/v1"


+def test_prompt_contains_tool_call_history_true() -> None:
+    from onyx.llm.multi_llm import _prompt_contains_tool_call_history
+
+    messages: LanguageModelInput = [
+        UserMessage(content="What's the weather?"),
+        AssistantMessage(
+            content=None,
+            tool_calls=[
+                ToolCall(
+                    id="tc_1",
+                    function=FunctionCall(name="get_weather", arguments="{}"),
+                )
+            ],
+        ),
+    ]
+    assert _prompt_contains_tool_call_history(messages) is True
+
+
+def test_prompt_contains_tool_call_history_false_no_tools() -> None:
+    from onyx.llm.multi_llm import _prompt_contains_tool_call_history
+
+    messages: LanguageModelInput = [
+        UserMessage(content="Hello"),
+        AssistantMessage(content="Hi there!"),
+    ]
+    assert _prompt_contains_tool_call_history(messages) is False
+
+
+def test_prompt_contains_tool_call_history_false_user_only() -> None:
+    from onyx.llm.multi_llm import _prompt_contains_tool_call_history
+
+    messages: LanguageModelInput = [UserMessage(content="Hello")]
+    assert _prompt_contains_tool_call_history(messages) is False
+
+
+def test_bedrock_claude_drops_thinking_when_thinking_blocks_missing() -> None:
+    """When thinking is enabled but assistant messages with tool_calls lack
+    thinking_blocks, the thinking param must be dropped to avoid the Bedrock
+    BadRequestError about missing thinking blocks."""
+    llm = LitellmLLM(
+        api_key=None,
+        timeout=30,
+        model_provider=LlmProviderNames.BEDROCK,
+        model_name="anthropic.claude-sonnet-4-20250514-v1:0",
+        max_input_tokens=200000,
+    )
+
+    messages: LanguageModelInput = [
+        UserMessage(content="What's the weather?"),
+        AssistantMessage(
+            content=None,
+            tool_calls=[
+                ToolCall(
+                    id="tc_1",
+                    function=FunctionCall(
+                        name="get_weather",
+                        arguments='{"city": "Paris"}',
+                    ),
+                )
+            ],
+        ),
+        onyx.llm.models.ToolMessage(
+            content="22°C sunny",
+            tool_call_id="tc_1",
+        ),
+    ]
+
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                },
+            },
+        }
+    ]
+
+    with (
+        patch("litellm.completion") as mock_completion,
+        patch("onyx.llm.multi_llm.model_is_reasoning_model", return_value=True),
+    ):
+        mock_completion.return_value = []
+
+        list(llm.stream(messages, tools=tools, reasoning_effort=ReasoningEffort.HIGH))
+
+        kwargs = mock_completion.call_args.kwargs
+        assert "thinking" not in kwargs, (
+            "thinking param should be dropped when thinking_blocks are missing "
+            "from assistant messages with tool_calls"
+        )
+
+
+def test_bedrock_claude_keeps_thinking_when_no_tool_history() -> None:
+    """When thinking is enabled and there are no historical assistant messages
+    with tool_calls, the thinking param should be preserved."""
+    llm = LitellmLLM(
+        api_key=None,
+        timeout=30,
+        model_provider=LlmProviderNames.BEDROCK,
+        model_name="anthropic.claude-sonnet-4-20250514-v1:0",
+        max_input_tokens=200000,
+    )
+
+    messages: LanguageModelInput = [
+        UserMessage(content="What's the weather?"),
+    ]
+
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                },
+            },
+        }
+    ]
+
+    with (
+        patch("litellm.completion") as mock_completion,
+        patch("onyx.llm.multi_llm.model_is_reasoning_model", return_value=True),
+    ):
+        mock_completion.return_value = []
+
+        list(llm.stream(messages, tools=tools, reasoning_effort=ReasoningEffort.HIGH))
+
+        kwargs = mock_completion.call_args.kwargs
+        assert "thinking" in kwargs, (
+            "thinking param should be preserved when no assistant messages "
+            "with tool_calls exist in history"
+        )
+        assert kwargs["thinking"]["type"] == "enabled"
+
+
 def test_bifrost_claude_includes_allowed_openai_params() -> None:
    llm = LitellmLLM(
        api_key="test_key",
--- a/backend/tests/unit/onyx/server/features/init.py
+++ b/backend/tests/unit/onyx/server/features/init.py
--- a/cli/README.md
+++ b/cli/README.md
@@ -63,6 +63,31 @@ onyx-cli agents
 onyx-cli agents --json
 ```

+### Serve over SSH
+
+```shell
+# Start a public SSH endpoint for the CLI TUI
+onyx-cli serve --host 0.0.0.0 --port 2222
+
+# Connect as a client
+ssh your-host -p 2222
+```
+
+Clients can either:
+- paste an API key at the login prompt, or
+- skip the prompt by sending `ONYX_API_KEY` over SSH:
+
+```shell
+export ONYX_API_KEY=your-key
+ssh -o SendEnv=ONYX_API_KEY your-host -p 2222
+```
+
+Useful hardening flags:
+- `--idle-timeout` (default `15m`)
+- `--max-session-timeout` (default `8h`)
+- `--rate-limit-per-minute` (default `20`)
+- `--rate-limit-burst` (default `40`)
+
 ## Commands

 | Command | Description |
@@ -70,6 +95,7 @@ onyx-cli agents --json
 | `chat` | Launch the interactive chat TUI (default) |
 | `ask` | Ask a one-shot question (non-interactive) |
 | `agents` | List available agents |
+| `serve` | Serve the interactive chat TUI over SSH |
 | `configure` | Configure server URL and API key |
 | `validate-config` | Validate configuration and test connection |

--- a/cli/cmd/root.go
+++ b/cli/cmd/root.go
@@ -96,6 +96,7 @@ func Execute() error {
 	rootCmd.AddCommand(newAgentsCmd())
 	rootCmd.AddCommand(newConfigureCmd())
 	rootCmd.AddCommand(newValidateConfigCmd())
+	rootCmd.AddCommand(newServeCmd())

 	// Default command is chat, but intercept --version first
 	rootCmd.RunE = func(cmd *cobra.Command, args []string) error {
--- a/cli/cmd/serve.go
+++ b/cli/cmd/serve.go
@@ -0,0 +1,450 @@
+package cmd
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"strings"
+	"syscall"
+	"time"
+
+	"github.com/charmbracelet/bubbles/textinput"
+	tea "github.com/charmbracelet/bubbletea"
+	"github.com/charmbracelet/log"
+	"github.com/charmbracelet/ssh"
+	"github.com/charmbracelet/wish"
+	"github.com/charmbracelet/wish/activeterm"
+	"github.com/charmbracelet/wish/bubbletea"
+	"github.com/charmbracelet/wish/logging"
+	"github.com/charmbracelet/wish/ratelimiter"
+	"github.com/onyx-dot-app/onyx/cli/internal/api"
+	"github.com/onyx-dot-app/onyx/cli/internal/config"
+	"github.com/onyx-dot-app/onyx/cli/internal/tui"
+	"github.com/spf13/cobra"
+	"golang.org/x/time/rate"
+)
+
+const (
+	defaultServeIdleTimeout        = 15 * time.Minute
+	defaultServeMaxSessionTimeout  = 8 * time.Hour
+	defaultServeRateLimitPerMinute = 20
+	defaultServeRateLimitBurst     = 40
+	defaultServeRateLimitCacheSize = 4096
+	maxAPIKeyLength                = 512
+	apiKeyValidationTimeout        = 15 * time.Second
+	maxAPIKeyRetries               = 5
+)
+
+func sessionEnv(s ssh.Session, key string) string {
+	prefix := key + "="
+	for _, env := range s.Environ() {
+		if strings.HasPrefix(env, prefix) {
+			return env[len(prefix):]
+		}
+	}
+	return ""
+}
+
+func validateAPIKey(serverURL string, apiKey string) error {
+	trimmedKey := strings.TrimSpace(apiKey)
+	if len(trimmedKey) > maxAPIKeyLength {
+		return fmt.Errorf("API key is too long (max %d characters)", maxAPIKeyLength)
+	}
+
+	cfg := config.OnyxCliConfig{
+		ServerURL: serverURL,
+		APIKey:    trimmedKey,
+	}
+	client := api.NewClient(cfg)
+	ctx, cancel := context.WithTimeout(context.Background(), apiKeyValidationTimeout)
+	defer cancel()
+	return client.TestConnection(ctx)
+}
+
+// --- auth prompt (bubbletea model) ---
+
+type authState int
+
+const (
+	authInput authState = iota
+	authValidating
+	authDone
+)
+
+type authValidatedMsg struct {
+	key string
+	err error
+}
+
+type authModel struct {
+	input     textinput.Model
+	serverURL string
+	state     authState
+	apiKey    string // set on successful validation
+	errMsg    string
+	retries   int
+	aborted   bool
+}
+
+func newAuthModel(serverURL, initialErr string) authModel {
+	ti := textinput.New()
+	ti.Prompt = "  API Key: "
+	ti.EchoMode = textinput.EchoPassword
+	ti.EchoCharacter = '•'
+	ti.CharLimit = maxAPIKeyLength
+	ti.Width = 80
+	ti.Focus()
+
+	return authModel{
+		input:     ti,
+		serverURL: serverURL,
+		errMsg:    initialErr,
+	}
+}
+
+func (m authModel) Update(msg tea.Msg) (authModel, tea.Cmd) {
+	switch msg := msg.(type) {
+	case tea.WindowSizeMsg:
+		m.input.Width = max(msg.Width-14, 20) // account for prompt width
+		return m, nil
+	case tea.KeyMsg:
+		switch msg.Type {
+		case tea.KeyCtrlC, tea.KeyCtrlD:
+			m.aborted = true
+			return m, nil
+		default:
+			if m.state == authValidating {
+				return m, nil
+			}
+		}
+		switch msg.Type {
+		case tea.KeyEnter:
+			key := strings.TrimSpace(m.input.Value())
+			if key == "" {
+				m.errMsg = "No key entered."
+				m.retries++
+				if m.retries >= maxAPIKeyRetries {
+					m.errMsg = "Too many failed attempts. Disconnecting."
+					m.aborted = true
+					return m, nil
+				}
+				m.input.SetValue("")
+				return m, nil
+			}
+			m.state = authValidating
+			m.errMsg = ""
+			serverURL := m.serverURL
+			return m, func() tea.Msg {
+				return authValidatedMsg{key: key, err: validateAPIKey(serverURL, key)}
+			}
+		}
+
+	case authValidatedMsg:
+		if msg.err != nil {
+			m.state = authInput
+			m.errMsg = msg.err.Error()
+			m.retries++
+			if m.retries >= maxAPIKeyRetries {
+				m.errMsg = "Too many failed attempts. Disconnecting."
+				m.aborted = true
+				return m, nil
+			}
+			m.input.SetValue("")
+			return m, m.input.Focus()
+		}
+		m.apiKey = msg.key
+		m.state = authDone
+		return m, nil
+	}
+
+	if m.state == authInput {
+		var cmd tea.Cmd
+		m.input, cmd = m.input.Update(msg)
+		return m, cmd
+	}
+	return m, nil
+}
+
+func (m authModel) View() string {
+	settingsURL := strings.TrimRight(m.serverURL, "/") + "/app/settings/accounts-access"
+
+	var b strings.Builder
+	b.WriteString("\n")
+	b.WriteString("  \x1b[1;35mOnyx CLI\x1b[0m\n")
+	b.WriteString("  \x1b[90m" + m.serverURL + "\x1b[0m\n")
+	b.WriteString("\n")
+	b.WriteString("  Generate an API key at:\n")
+	b.WriteString("  \x1b[4;34m" + settingsURL + "\x1b[0m\n")
+	b.WriteString("\n")
+	b.WriteString("  \x1b[90mTip: skip this prompt by passing your key via SSH:\x1b[0m\n")
+	b.WriteString("  \x1b[90m  export ONYX_API_KEY=<key>\x1b[0m\n")
+	b.WriteString("  \x1b[90m  ssh -o SendEnv=ONYX_API_KEY <host> -p <port>\x1b[0m\n")
+	b.WriteString("\n")
+
+	if m.errMsg != "" {
+		b.WriteString("  \x1b[1;31m" + m.errMsg + "\x1b[0m\n\n")
+	}
+
+	switch m.state {
+	case authDone:
+		b.WriteString("  \x1b[32mAuthenticated.\x1b[0m\n")
+	case authValidating:
+		b.WriteString("  \x1b[90mValidating…\x1b[0m\n")
+	default:
+		b.WriteString(m.input.View() + "\n")
+	}
+
+	return b.String()
+}
+
+// --- serve model (wraps auth → TUI in a single bubbletea program) ---
+
+type serveModel struct {
+	auth      authModel
+	tui       tea.Model
+	authed    bool
+	serverCfg config.OnyxCliConfig
+	width     int
+	height    int
+}
+
+func newServeModel(serverCfg config.OnyxCliConfig, initialErr string) serveModel {
+	return serveModel{
+		auth:      newAuthModel(serverCfg.ServerURL, initialErr),
+		serverCfg: serverCfg,
+	}
+}
+
+func (m serveModel) Init() tea.Cmd {
+	return textinput.Blink
+}
+
+func (m serveModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
+	if !m.authed {
+		if ws, ok := msg.(tea.WindowSizeMsg); ok {
+			m.width = ws.Width
+			m.height = ws.Height
+		}
+
+		var cmd tea.Cmd
+		m.auth, cmd = m.auth.Update(msg)
+
+		if m.auth.aborted {
+			return m, tea.Quit
+		}
+		if m.auth.apiKey != "" {
+			cfg := config.OnyxCliConfig{
+				ServerURL:      m.serverCfg.ServerURL,
+				APIKey:         m.auth.apiKey,
+				DefaultAgentID: m.serverCfg.DefaultAgentID,
+			}
+			m.tui = tui.NewModel(cfg)
+			m.authed = true
+			w, h := m.width, m.height
+			return m, tea.Batch(
+				tea.EnterAltScreen,
+				tea.EnableMouseCellMotion,
+				m.tui.Init(),
+				func() tea.Msg { return tea.WindowSizeMsg{Width: w, Height: h} },
+			)
+		}
+		return m, cmd
+	}
+
+	var cmd tea.Cmd
+	m.tui, cmd = m.tui.Update(msg)
+	return m, cmd
+}
+
+func (m serveModel) View() string {
+	if !m.authed {
+		return m.auth.View()
+	}
+	return m.tui.View()
+}
+
+// --- serve command ---
+
+func newServeCmd() *cobra.Command {
+	var (
+		host              string
+		port              int
+		keyPath           string
+		idleTimeout       time.Duration
+		maxSessionTimeout time.Duration
+		rateLimitPerMin   int
+		rateLimitBurst    int
+		rateLimitCache    int
+	)
+
+	cmd := &cobra.Command{
+		Use:   "serve",
+		Short: "Serve the Onyx TUI over SSH",
+		Long: `Start an SSH server that presents the interactive Onyx chat TUI to
+connecting clients. Each SSH session gets its own independent TUI instance.
+
+Clients are prompted for their Onyx API key on connect. The key can also be
+provided via the ONYX_API_KEY environment variable to skip the prompt:
+
+  ssh -o SendEnv=ONYX_API_KEY host -p port
+
+The server URL is taken from the server operator's config. The server
+auto-generates an Ed25519 host key on first run if the key file does not
+already exist. The host key path can also be set via the ONYX_SSH_HOST_KEY
+environment variable (the --host-key flag takes precedence).
+
+Example:
+  onyx-cli serve --port 2222
+  ssh localhost -p 2222`,
+		RunE: func(cmd *cobra.Command, args []string) error {
+			serverCfg := config.Load()
+			if serverCfg.ServerURL == "" {
+				return fmt.Errorf("server URL is not configured; run 'onyx-cli configure' first")
+			}
+			if !cmd.Flags().Changed("host-key") {
+				if v := os.Getenv(config.EnvSSHHostKey); v != "" {
+					keyPath = v
+				}
+			}
+			if rateLimitPerMin <= 0 {
+				return fmt.Errorf("--rate-limit-per-minute must be > 0")
+			}
+			if rateLimitBurst <= 0 {
+				return fmt.Errorf("--rate-limit-burst must be > 0")
+			}
+			if rateLimitCache <= 0 {
+				return fmt.Errorf("--rate-limit-cache must be > 0")
+			}
+
+			addr := net.JoinHostPort(host, fmt.Sprintf("%d", port))
+			connectionLimiter := ratelimiter.NewRateLimiter(
+				rate.Limit(float64(rateLimitPerMin)/60.0),
+				rateLimitBurst,
+				rateLimitCache,
+			)
+
+			handler := func(s ssh.Session) (tea.Model, []tea.ProgramOption) {
+				apiKey := strings.TrimSpace(sessionEnv(s, config.EnvAPIKey))
+				var envErr string
+
+				if apiKey != "" {
+					if err := validateAPIKey(serverCfg.ServerURL, apiKey); err != nil {
+						envErr = fmt.Sprintf("ONYX_API_KEY from SSH environment is invalid: %s", err.Error())
+						apiKey = ""
+					}
+				}
+
+				if apiKey != "" {
+					// Env key is valid — go straight to the TUI.
+					cfg := config.OnyxCliConfig{
+						ServerURL:      serverCfg.ServerURL,
+						APIKey:         apiKey,
+						DefaultAgentID: serverCfg.DefaultAgentID,
+					}
+					return tui.NewModel(cfg), []tea.ProgramOption{
+						tea.WithAltScreen(),
+						tea.WithMouseCellMotion(),
+					}
+				}
+
+				// No valid env key — show auth prompt, then transition
+				// to the TUI within the same bubbletea program.
+				return newServeModel(serverCfg, envErr), []tea.ProgramOption{
+					tea.WithMouseCellMotion(),
+				}
+			}
+
+			serverOptions := []ssh.Option{
+				wish.WithAddress(addr),
+				wish.WithHostKeyPath(keyPath),
+				wish.WithMiddleware(
+					bubbletea.Middleware(handler),
+					activeterm.Middleware(),
+					ratelimiter.Middleware(connectionLimiter),
+					logging.Middleware(),
+				),
+			}
+			if idleTimeout > 0 {
+				serverOptions = append(serverOptions, wish.WithIdleTimeout(idleTimeout))
+			}
+			if maxSessionTimeout > 0 {
+				serverOptions = append(serverOptions, wish.WithMaxTimeout(maxSessionTimeout))
+			}
+
+			s, err := wish.NewServer(serverOptions...)
+			if err != nil {
+				return fmt.Errorf("could not create SSH server: %w", err)
+			}
+
+			done := make(chan os.Signal, 1)
+			signal.Notify(done, os.Interrupt, syscall.SIGTERM)
+
+			log.Info("Starting Onyx SSH server", "addr", addr)
+			log.Info("Connect with", "cmd", fmt.Sprintf("ssh %s -p %d", host, port))
+
+			errCh := make(chan error, 1)
+			go func() {
+				if err := s.ListenAndServe(); err != nil && !errors.Is(err, ssh.ErrServerClosed) {
+					log.Error("SSH server failed", "error", err)
+					errCh <- err
+				}
+			}()
+
+			var serverErr error
+			select {
+			case <-done:
+			case serverErr = <-errCh:
+			}
+
+			signal.Stop(done)
+			log.Info("Shutting down SSH server")
+			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+			defer cancel()
+			if shutdownErr := s.Shutdown(ctx); shutdownErr != nil {
+				return errors.Join(serverErr, shutdownErr)
+			}
+			return serverErr
+		},
+	}
+
+	cmd.Flags().StringVar(&host, "host", "localhost", "Host address to bind to")
+	cmd.Flags().IntVarP(&port, "port", "p", 2222, "Port to listen on")
+	cmd.Flags().StringVar(&keyPath, "host-key", filepath.Join(config.ConfigDir(), "host_ed25519"),
+		"Path to SSH host key (auto-generated if missing)")
+	cmd.Flags().DurationVar(
+		&idleTimeout,
+		"idle-timeout",
+		defaultServeIdleTimeout,
+		"Disconnect idle clients after this duration (set 0 to disable)",
+	)
+	cmd.Flags().DurationVar(
+		&maxSessionTimeout,
+		"max-session-timeout",
+		defaultServeMaxSessionTimeout,
+		"Maximum lifetime of a client session (set 0 to disable)",
+	)
+	cmd.Flags().IntVar(
+		&rateLimitPerMin,
+		"rate-limit-per-minute",
+		defaultServeRateLimitPerMinute,
+		"Per-IP connection rate limit (new sessions per minute)",
+	)
+	cmd.Flags().IntVar(
+		&rateLimitBurst,
+		"rate-limit-burst",
+		defaultServeRateLimitBurst,
+		"Per-IP burst limit for connection attempts",
+	)
+	cmd.Flags().IntVar(
+		&rateLimitCache,
+		"rate-limit-cache",
+		defaultServeRateLimitCacheSize,
+		"Maximum number of IP limiter entries tracked in memory",
+	)
+
+	return cmd
+}
--- a/cli/go.mod
+++ b/cli/go.mod
@@ -7,27 +7,40 @@ require (
 	github.com/charmbracelet/bubbletea v1.3.10
 	github.com/charmbracelet/glamour v1.0.0
 	github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834
+	github.com/charmbracelet/log v1.0.0
+	github.com/charmbracelet/ssh v0.0.0-20250826160808-ebfa259c7309
+	github.com/charmbracelet/wish v1.4.7
 	github.com/sirupsen/logrus v1.9.4
 	github.com/spf13/cobra v1.10.2
 	golang.org/x/term v0.41.0
 	golang.org/x/text v0.35.0
+	golang.org/x/time v0.15.0
 )

 require (
 	github.com/alecthomas/chroma/v2 v2.23.1 // indirect
+	github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be // indirect
 	github.com/atotto/clipboard v0.1.4 // indirect
 	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
 	github.com/aymerick/douceur v0.2.0 // indirect
 	github.com/charmbracelet/colorprofile v0.4.3 // indirect
+	github.com/charmbracelet/keygen v0.5.4 // indirect
 	github.com/charmbracelet/x/ansi v0.11.6 // indirect
 	github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
+	github.com/charmbracelet/x/conpty v0.2.0 // indirect
 	github.com/charmbracelet/x/exp/slice v0.0.0-20260323091123-df7b1bcffcca // indirect
+	github.com/charmbracelet/x/input v0.3.7 // indirect
 	github.com/charmbracelet/x/term v0.2.2 // indirect
+	github.com/charmbracelet/x/termios v0.1.1 // indirect
+	github.com/charmbracelet/x/windows v0.2.2 // indirect
 	github.com/clipperhouse/displaywidth v0.11.0 // indirect
 	github.com/clipperhouse/uax29/v2 v2.7.0 // indirect
+	github.com/creack/pty v1.1.24 // indirect
 	github.com/dlclark/regexp2 v1.11.5 // indirect
 	github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
+	github.com/go-logfmt/logfmt v0.6.1 // indirect
 	github.com/gorilla/css v1.0.1 // indirect
+	github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
@@ -43,6 +56,8 @@ require (
 	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
 	github.com/yuin/goldmark v1.8.2 // indirect
 	github.com/yuin/goldmark-emoji v1.0.6 // indirect
+	golang.org/x/crypto v0.49.0 // indirect
+	golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 // indirect
 	golang.org/x/net v0.52.0 // indirect
 	golang.org/x/sys v0.42.0 // indirect
 )
--- a/cli/go.sum
+++ b/cli/go.sum
@@ -4,6 +4,8 @@ github.com/alecthomas/chroma/v2 v2.23.1 h1:nv2AVZdTyClGbVQkIzlDm/rnhk1E9bU9nXwmZ
 github.com/alecthomas/chroma/v2 v2.23.1/go.mod h1:NqVhfBR0lte5Ouh3DcthuUCTUpDC9cxBOfyMbMQPs3o=
 github.com/alecthomas/repr v0.5.2 h1:SU73FTI9D1P5UNtvseffFSGmdNci/O6RsqzeXJtP0Qs=
 github.com/alecthomas/repr v0.5.2/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
+github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
+github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
 github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
 github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
 github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
@@ -20,31 +22,55 @@ github.com/charmbracelet/colorprofile v0.4.3 h1:QPa1IWkYI+AOB+fE+mg/5/4HRMZcaXex
 github.com/charmbracelet/colorprofile v0.4.3/go.mod h1:/zT4BhpD5aGFpqQQqw7a+VtHCzu+zrQtt1zhMt9mR4Q=
 github.com/charmbracelet/glamour v1.0.0 h1:AWMLOVFHTsysl4WV8T8QgkQ0s/ZNZo7CiE4WKhk8l08=
 github.com/charmbracelet/glamour v1.0.0/go.mod h1:DSdohgOBkMr2ZQNhw4LZxSGpx3SvpeujNoXrQyH2hxo=
+github.com/charmbracelet/keygen v0.5.4 h1:XQYgf6UEaTGgQSSmiPpIQ78WfseNQp4Pz8N/c1OsrdA=
+github.com/charmbracelet/keygen v0.5.4/go.mod h1:t4oBRr41bvK7FaJsAaAQhhkUuHslzFXVjOBwA55CZNM=
 github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834 h1:ZR7e0ro+SZZiIZD7msJyA+NjkCNNavuiPBLgerbOziE=
 github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834/go.mod h1:aKC/t2arECF6rNOnaKaVU6y4t4ZeHQzqfxedE/VkVhA=
+github.com/charmbracelet/log v1.0.0 h1:HVVVMmfOorfj3BA9i8X8UL69Hoz9lI0PYwXfJvOdRc4=
+github.com/charmbracelet/log v1.0.0/go.mod h1:uYgY3SmLpwJWxmlrPwXvzVYujxis1vAKRV/0VQB7yWA=
+github.com/charmbracelet/ssh v0.0.0-20250826160808-ebfa259c7309 h1:dCVbCRRtg9+tsfiTXTp0WupDlHruAXyp+YoxGVofHHc=
+github.com/charmbracelet/ssh v0.0.0-20250826160808-ebfa259c7309/go.mod h1:R9cISUs5kAH4Cq/rguNbSwcR+slE5Dfm8FEs//uoIGE=
+github.com/charmbracelet/wish v1.4.7 h1:O+jdLac3s6GaqkOHHSwezejNK04vl6VjO1A+hl8J8Yc=
+github.com/charmbracelet/wish v1.4.7/go.mod h1:OBZ8vC62JC5cvbxJLh+bIWtG7Ctmct+ewziuUWK+G14=
 github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF7ihZW8=
 github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ=
 github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
 github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
+github.com/charmbracelet/x/conpty v0.2.0 h1:eKtA2hm34qNfgJCDp/M6Dc0gLy7e07YEK4qAdNGOvVY=
+github.com/charmbracelet/x/conpty v0.2.0/go.mod h1:fexgUnVrZgw8scD49f6VSi0Ggj9GWYIrpedRthAwW/8=
 github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91 h1:payRxjMjKgx2PaCWLZ4p3ro9y97+TVLZNaRZgJwSVDQ=
 github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U=
 github.com/charmbracelet/x/exp/slice v0.0.0-20260323091123-df7b1bcffcca h1:QQoyQLgUzojMNWHVHToN6d9qTvT0KWtxUKIRPx/Ox5o=
 github.com/charmbracelet/x/exp/slice v0.0.0-20260323091123-df7b1bcffcca/go.mod h1:vqEfX6xzqW1pKKZUUiFOKg0OQ7bCh54Q2vR/tserrRA=
+github.com/charmbracelet/x/input v0.3.7 h1:UzVbkt1vgM9dBQ+K+uRolBlN6IF2oLchmPKKo/aucXo=
+github.com/charmbracelet/x/input v0.3.7/go.mod h1:ZSS9Cia6Cycf2T6ToKIOxeTBTDwl25AGwArJuGaOBH8=
 github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
 github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
+github.com/charmbracelet/x/termios v0.1.1 h1:o3Q2bT8eqzGnGPOYheoYS8eEleT5ZVNYNy8JawjaNZY=
+github.com/charmbracelet/x/termios v0.1.1/go.mod h1:rB7fnv1TgOPOyyKRJ9o+AsTU/vK5WHJ2ivHeut/Pcwo=
+github.com/charmbracelet/x/windows v0.2.2 h1:IofanmuvaxnKHuV04sC0eBy/smG6kIKrWG2/jYn2GuM=
+github.com/charmbracelet/x/windows v0.2.2/go.mod h1:/8XtdKZzedat74NQFn0NGlGL4soHB0YQZrETF96h75k=
 github.com/clipperhouse/displaywidth v0.11.0 h1:lBc6kY44VFw+TDx4I8opi/EtL9m20WSEFgwIwO+UVM8=
 github.com/clipperhouse/displaywidth v0.11.0/go.mod h1:bkrFNkf81G8HyVqmKGxsPufD3JhNl3dSqnGhOoSD/o0=
 github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk=
 github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM=
 github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
+github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
+github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
 github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
 github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
+github.com/go-logfmt/logfmt v0.6.1 h1:4hvbpePJKnIzH1B+8OR/JPbTx37NktoI9LE2QZBBkvE=
+github.com/go-logfmt/logfmt v0.6.1/go.mod h1:EV2pOAQoZaT1ZXZbqDl5hrymndi4SY9ED9/z6CO0XAk=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
 github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
+github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
+github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
 github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
@@ -82,8 +108,8 @@ github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiT
 github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
 github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
-github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
 github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
 github.com/yuin/goldmark v1.8.2 h1:kEGpgqJXdgbkhcOgBxkC0X0PmoPG1ZyoZ117rDVp4zE=
@@ -91,10 +117,14 @@ github.com/yuin/goldmark v1.8.2/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc
 github.com/yuin/goldmark-emoji v1.0.6 h1:QWfF2FYaXwL74tfGOW5izeiZepUDroDJfWubQI9HTHs=
 github.com/yuin/goldmark-emoji v1.0.6/go.mod h1:ukxJDKFpdFb5x0a5HqbdlcKtebh086iJpI31LTKmWuA=
 go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
-golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
-golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
+golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
+golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
+golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 h1:jiDhWWeC7jfWqR9c/uplMOqJ0sbNlNWv0UkzE0vX1MA=
+golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90/go.mod h1:xE1HEv6b+1SCZ5/uscMRjUBKtIxworgEcEi+/n9NQDQ=
 golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
 golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
+golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
+golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
 golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
@@ -103,6 +133,8 @@ golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU=
 golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A=
 golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
 golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
+golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
+golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/cli/internal/config/config.go
+++ b/cli/internal/config/config.go
@@ -9,9 +9,10 @@ import (
 )

 const (
-	EnvServerURL    = "ONYX_SERVER_URL"
-	EnvAPIKey = "ONYX_API_KEY"
+	EnvServerURL  = "ONYX_SERVER_URL"
+	EnvAPIKey     = "ONYX_API_KEY"
 	EnvAgentID    = "ONYX_PERSONA_ID"
+	EnvSSHHostKey = "ONYX_SSH_HOST_KEY"
 )

 // OnyxCliConfig holds the CLI configuration.
@@ -35,8 +36,8 @@ func (c OnyxCliConfig) IsConfigured() bool {
 	return c.APIKey != ""
 }

-// configDir returns ~/.config/onyx-cli
-func configDir() string {
+// ConfigDir returns ~/.config/onyx-cli
+func ConfigDir() string {
 	if xdg := os.Getenv("XDG_CONFIG_HOME"); xdg != "" {
 		return filepath.Join(xdg, "onyx-cli")
 	}
@@ -49,7 +50,7 @@ func configDir() string {

 // ConfigFilePath returns the full path to the config file.
 func ConfigFilePath() string {
-	return filepath.Join(configDir(), "config.json")
+	return filepath.Join(ConfigDir(), "config.json")
 }

 // ConfigExists checks if the config file exists on disk.
@@ -87,7 +88,7 @@ func Load() OnyxCliConfig {

 // Save writes the config to disk, creating parent directories if needed.
 func Save(cfg OnyxCliConfig) error {
-	dir := configDir()
+	dir := ConfigDir()
 	if err := os.MkdirAll(dir, 0o755); err != nil {
 		return err
 	}
--- a/deployment/docker_compose/install.sh
+++ b/deployment/docker_compose/install.sh
@@ -203,6 +203,7 @@ prompt_or_default() {
    local default_value="$2"
    read_prompt_line "$prompt_text"
    [[ -z "$REPLY" ]] && REPLY="$default_value"
+    return 0
 }

 prompt_yn_or_default() {
@@ -210,6 +211,7 @@ prompt_yn_or_default() {
    local default_value="$2"
    read_prompt_char "$prompt_text"
    [[ -z "$REPLY" ]] && REPLY="$default_value"
+    return 0
 }

 confirm_action() {
--- a/deployment/helm/charts/onyx/Chart.yaml
+++ b/deployment/helm/charts/onyx/Chart.yaml
@@ -5,7 +5,7 @@ home: https://www.onyx.app/
 sources:
  - "https://github.com/onyx-dot-app/onyx"
 type: application
-version: 0.4.38
+version: 0.4.39
 appVersion: latest
 annotations:
  category: Productivity
--- a/deployment/helm/charts/onyx/dashboards/indexing-pipeline.json
+++ b/deployment/helm/charts/onyx/dashboards/indexing-pipeline.json
--- a/deployment/helm/charts/onyx/templates/celery-worker-servicemonitors.yaml
+++ b/deployment/helm/charts/onyx/templates/celery-worker-servicemonitors.yaml
@@ -0,0 +1,77 @@
+{{- if and .Values.monitoring.serviceMonitors.enabled .Values.vectorDB.enabled }}
+{{- if gt (int .Values.celery_worker_monitoring.replicaCount) 0 }}
+---
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "onyx.fullname" . }}-celery-worker-monitoring
+  labels:
+    {{- include "onyx.labels" . | nindent 4 }}
+    {{- with .Values.monitoring.serviceMonitors.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+spec:
+  namespaceSelector:
+    matchNames:
+      - {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      app: {{ .Values.celery_worker_monitoring.deploymentLabels.app }}
+      metrics: "true"
+  endpoints:
+    - port: metrics
+      path: /metrics
+      interval: 30s
+      scrapeTimeout: 10s
+{{- end }}
+{{- if gt (int .Values.celery_worker_docfetching.replicaCount) 0 }}
+---
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "onyx.fullname" . }}-celery-worker-docfetching
+  labels:
+    {{- include "onyx.labels" . | nindent 4 }}
+    {{- with .Values.monitoring.serviceMonitors.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+spec:
+  namespaceSelector:
+    matchNames:
+      - {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      app: {{ .Values.celery_worker_docfetching.deploymentLabels.app }}
+      metrics: "true"
+  endpoints:
+    - port: metrics
+      path: /metrics
+      interval: 30s
+      scrapeTimeout: 10s
+{{- end }}
+{{- if gt (int .Values.celery_worker_docprocessing.replicaCount) 0 }}
+---
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "onyx.fullname" . }}-celery-worker-docprocessing
+  labels:
+    {{- include "onyx.labels" . | nindent 4 }}
+    {{- with .Values.monitoring.serviceMonitors.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+spec:
+  namespaceSelector:
+    matchNames:
+      - {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      app: {{ .Values.celery_worker_docprocessing.deploymentLabels.app }}
+      metrics: "true"
+  endpoints:
+    - port: metrics
+      path: /metrics
+      interval: 30s
+      scrapeTimeout: 10s
+{{- end }}
+{{- end }}
--- a/deployment/helm/charts/onyx/templates/grafana-dashboards.yaml
+++ b/deployment/helm/charts/onyx/templates/grafana-dashboards.yaml
@@ -0,0 +1,15 @@
+{{- if .Values.monitoring.grafana.dashboards.enabled }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "onyx.fullname" . }}-indexing-pipeline-dashboard
+  labels:
+    {{- include "onyx.labels" . | nindent 4 }}
+    grafana_dashboard: "1"
+  annotations:
+    grafana_folder: "Onyx"
+data:
+  onyx-indexing-pipeline.json: |
+    {{- .Files.Get "dashboards/indexing-pipeline.json" | nindent 4 }}
+{{- end }}
--- a/deployment/helm/charts/onyx/values.yaml
+++ b/deployment/helm/charts/onyx/values.yaml
@@ -256,6 +256,20 @@ tooling:
    # -- Which client binary to call; change if your image uses a non-default path.
    psqlBinary: psql

+monitoring:
+  grafana:
+    dashboards:
+      # -- Set to true to deploy Grafana dashboard ConfigMaps for the Onyx indexing pipeline.
+      # Requires kube-prometheus-stack (or equivalent) with the Grafana sidecar enabled and watching this namespace.
+      # The sidecar must be configured with label selector: grafana_dashboard=1
+      enabled: false
+  serviceMonitors:
+    # -- Set to true to deploy ServiceMonitor resources for Celery worker metrics endpoints.
+    # Requires the Prometheus Operator CRDs (included in kube-prometheus-stack).
+    # Use `labels` to match your Prometheus CR's serviceMonitorSelector (e.g. release: onyx-monitoring).
+    enabled: false
+    labels: {}
+
 serviceAccount:
  # Specifies whether a service account should be created
  create: false
--- a/deployment/terraform/modules/aws/eks/main.tf
+++ b/deployment/terraform/modules/aws/eks/main.tf
@@ -19,6 +19,10 @@ module "eks" {
  cluster_endpoint_public_access_cidrs     = var.cluster_endpoint_public_access_cidrs
  enable_cluster_creator_admin_permissions = true

+  # Control plane logging
+  cluster_enabled_log_types              = var.cluster_enabled_log_types
+  cloudwatch_log_group_retention_in_days = var.cloudwatch_log_group_retention_in_days
+
  eks_managed_node_group_defaults = {
    ami_type = "AL2023_x86_64_STANDARD"
  }
--- a/deployment/terraform/modules/aws/eks/variables.tf
+++ b/deployment/terraform/modules/aws/eks/variables.tf
@@ -161,3 +161,25 @@ variable "rds_db_connect_arn" {
  description = "Full rds-db:connect ARN to allow (required when enable_rds_iam_for_service_account is true)"
  default     = null
 }
+
+variable "cluster_enabled_log_types" {
+  type        = list(string)
+  description = "EKS control plane log types to enable (valid: api, audit, authenticator, controllerManager, scheduler)"
+  default     = ["api", "audit", "authenticator", "controllerManager", "scheduler"]
+
+  validation {
+    condition     = alltrue([for t in var.cluster_enabled_log_types : contains(["api", "audit", "authenticator", "controllerManager", "scheduler"], t)])
+    error_message = "Each entry must be one of: api, audit, authenticator, controllerManager, scheduler."
+  }
+}
+
+variable "cloudwatch_log_group_retention_in_days" {
+  type        = number
+  description = "Number of days to retain EKS control plane logs in CloudWatch (0 = never expire)"
+  default     = 30
+
+  validation {
+    condition     = contains([0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653], var.cloudwatch_log_group_retention_in_days)
+    error_message = "Must be a valid CloudWatch retention value (0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653)."
+  }
+}
--- a/deployment/terraform/modules/aws/onyx/main.tf
+++ b/deployment/terraform/modules/aws/onyx/main.tf
@@ -54,6 +54,9 @@ module "postgres" {
  password            = var.postgres_password
  tags                = local.merged_tags
  enable_rds_iam_auth = var.enable_iam_auth
+
+  backup_retention_period = var.postgres_backup_retention_period
+  backup_window           = var.postgres_backup_window
 }

 module "s3" {
@@ -80,6 +83,10 @@ module "eks" {
  public_cluster_enabled               = var.public_cluster_enabled
  private_cluster_enabled              = var.private_cluster_enabled
  cluster_endpoint_public_access_cidrs = var.cluster_endpoint_public_access_cidrs
+
+  # Control plane logging
+  cluster_enabled_log_types              = var.eks_cluster_enabled_log_types
+  cloudwatch_log_group_retention_in_days = var.eks_cloudwatch_log_group_retention_in_days
 }

 module "waf" {
--- a/deployment/terraform/modules/aws/onyx/variables.tf
+++ b/deployment/terraform/modules/aws/onyx/variables.tf
@@ -250,3 +250,34 @@ variable "opensearch_subnet_ids" {
  description = "Subnet IDs for OpenSearch. If empty, uses first 3 private subnets."
  default     = []
 }
+
+# RDS Backup Configuration
+variable "postgres_backup_retention_period" {
+  type        = number
+  description = "Number of days to retain automated RDS backups (0 to disable)"
+  default     = 7
+}
+
+variable "postgres_backup_window" {
+  type        = string
+  description = "Preferred UTC time window for automated RDS backups (hh24:mi-hh24:mi)"
+  default     = "03:00-04:00"
+}
+
+# EKS Control Plane Logging
+variable "eks_cluster_enabled_log_types" {
+  type        = list(string)
+  description = "EKS control plane log types to enable (valid: api, audit, authenticator, controllerManager, scheduler)"
+  default     = ["api", "audit", "authenticator", "controllerManager", "scheduler"]
+}
+
+variable "eks_cloudwatch_log_group_retention_in_days" {
+  type        = number
+  description = "Number of days to retain EKS control plane logs in CloudWatch (0 = never expire)"
+  default     = 30
+
+  validation {
+    condition     = contains([0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653], var.eks_cloudwatch_log_group_retention_in_days)
+    error_message = "Must be a valid CloudWatch retention value (0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1096, 1827, 2192, 2557, 2922, 3288, 3653)."
+  }
+}
--- a/deployment/terraform/modules/aws/postgres/main.tf
+++ b/deployment/terraform/modules/aws/postgres/main.tf
@@ -44,5 +44,79 @@ resource "aws_db_instance" "this" {
  publicly_accessible    = false
  deletion_protection    = true
  storage_encrypted      = true
-  tags                   = var.tags
+
+  # Automated backups
+  backup_retention_period = var.backup_retention_period
+  backup_window           = var.backup_window
+
+  tags = var.tags
+}
+
+# CloudWatch alarm for CPU utilization monitoring
+resource "aws_cloudwatch_metric_alarm" "cpu_utilization" {
+  alarm_name          = "${var.identifier}-cpu-utilization"
+  alarm_description   = "RDS CPU utilization for ${var.identifier}"
+  comparison_operator = "GreaterThanThreshold"
+  evaluation_periods  = var.cpu_alarm_evaluation_periods
+  metric_name         = "CPUUtilization"
+  namespace           = "AWS/RDS"
+  period              = var.cpu_alarm_period
+  statistic           = "Average"
+  threshold           = var.cpu_alarm_threshold
+  treat_missing_data  = "missing"
+
+  alarm_actions = var.alarm_actions
+  ok_actions    = var.alarm_actions
+
+  dimensions = {
+    DBInstanceIdentifier = aws_db_instance.this.identifier
+  }
+
+  tags = var.tags
+}
+
+# CloudWatch alarm for disk IO monitoring
+resource "aws_cloudwatch_metric_alarm" "read_iops" {
+  alarm_name          = "${var.identifier}-read-iops"
+  alarm_description   = "RDS ReadIOPS for ${var.identifier}"
+  comparison_operator = "GreaterThanThreshold"
+  evaluation_periods  = var.iops_alarm_evaluation_periods
+  metric_name         = "ReadIOPS"
+  namespace           = "AWS/RDS"
+  period              = var.iops_alarm_period
+  statistic           = "Average"
+  threshold           = var.read_iops_alarm_threshold
+  treat_missing_data  = "missing"
+
+  alarm_actions = var.alarm_actions
+  ok_actions    = var.alarm_actions
+
+  dimensions = {
+    DBInstanceIdentifier = aws_db_instance.this.identifier
+  }
+
+  tags = var.tags
+}
+
+# CloudWatch alarm for freeable memory monitoring
+resource "aws_cloudwatch_metric_alarm" "freeable_memory" {
+  alarm_name          = "${var.identifier}-freeable-memory"
+  alarm_description   = "RDS freeable memory for ${var.identifier}"
+  comparison_operator = "LessThanThreshold"
+  evaluation_periods  = var.memory_alarm_evaluation_periods
+  metric_name         = "FreeableMemory"
+  namespace           = "AWS/RDS"
+  period              = var.memory_alarm_period
+  statistic           = "Average"
+  threshold           = var.memory_alarm_threshold
+  treat_missing_data  = "missing"
+
+  alarm_actions = var.alarm_actions
+  ok_actions    = var.alarm_actions
+
+  dimensions = {
+    DBInstanceIdentifier = aws_db_instance.this.identifier
+  }
+
+  tags = var.tags
 }
--- a/deployment/terraform/modules/aws/postgres/variables.tf
+++ b/deployment/terraform/modules/aws/postgres/variables.tf
@@ -67,3 +67,131 @@ variable "enable_rds_iam_auth" {
  description = "Enable AWS IAM database authentication for this RDS instance"
  default     = false
 }
+
+variable "backup_retention_period" {
+  type        = number
+  description = "Number of days to retain automated backups (0 to disable)"
+  default     = 7
+
+  validation {
+    condition     = var.backup_retention_period >= 0 && var.backup_retention_period <= 35
+    error_message = "backup_retention_period must be between 0 and 35 (AWS RDS limit)."
+  }
+}
+
+variable "backup_window" {
+  type        = string
+  description = "Preferred UTC time window for automated backups (hh24:mi-hh24:mi)"
+  default     = "03:00-04:00"
+
+  validation {
+    condition     = can(regex("^([01]\\d|2[0-3]):[0-5]\\d-([01]\\d|2[0-3]):[0-5]\\d$", var.backup_window))
+    error_message = "backup_window must be in hh24:mi-hh24:mi format (e.g. \"03:00-04:00\")."
+  }
+}
+
+# CloudWatch CPU alarm configuration
+variable "cpu_alarm_threshold" {
+  type        = number
+  description = "CPU utilization percentage threshold for the CloudWatch alarm"
+  default     = 80
+
+  validation {
+    condition     = var.cpu_alarm_threshold >= 0 && var.cpu_alarm_threshold <= 100
+    error_message = "cpu_alarm_threshold must be between 0 and 100 (percentage)."
+  }
+}
+
+variable "cpu_alarm_evaluation_periods" {
+  type        = number
+  description = "Number of consecutive periods the threshold must be breached before alarming"
+  default     = 3
+
+  validation {
+    condition     = var.cpu_alarm_evaluation_periods >= 1
+    error_message = "cpu_alarm_evaluation_periods must be at least 1."
+  }
+}
+
+variable "cpu_alarm_period" {
+  type        = number
+  description = "Period in seconds over which the CPU metric is evaluated"
+  default     = 300
+
+  validation {
+    condition     = var.cpu_alarm_period >= 60 && var.cpu_alarm_period % 60 == 0
+    error_message = "cpu_alarm_period must be a multiple of 60 seconds and at least 60 (CloudWatch requirement)."
+  }
+}
+
+variable "memory_alarm_threshold" {
+  type        = number
+  description = "Freeable memory threshold in bytes. Alarm fires when memory drops below this value."
+  default     = 256000000 # 256 MB
+
+  validation {
+    condition     = var.memory_alarm_threshold > 0
+    error_message = "memory_alarm_threshold must be greater than 0."
+  }
+}
+
+variable "memory_alarm_evaluation_periods" {
+  type        = number
+  description = "Number of consecutive periods the threshold must be breached before alarming"
+  default     = 3
+
+  validation {
+    condition     = var.memory_alarm_evaluation_periods >= 1
+    error_message = "memory_alarm_evaluation_periods must be at least 1."
+  }
+}
+
+variable "memory_alarm_period" {
+  type        = number
+  description = "Period in seconds over which the freeable memory metric is evaluated"
+  default     = 300
+
+  validation {
+    condition     = var.memory_alarm_period >= 60 && var.memory_alarm_period % 60 == 0
+    error_message = "memory_alarm_period must be a multiple of 60 seconds and at least 60 (CloudWatch requirement)."
+  }
+}
+
+variable "read_iops_alarm_threshold" {
+  type        = number
+  description = "ReadIOPS threshold. Alarm fires when IOPS exceeds this value."
+  default     = 3000
+
+  validation {
+    condition     = var.read_iops_alarm_threshold > 0
+    error_message = "read_iops_alarm_threshold must be greater than 0."
+  }
+}
+
+variable "iops_alarm_evaluation_periods" {
+  type        = number
+  description = "Number of consecutive periods the IOPS threshold must be breached before alarming"
+  default     = 3
+
+  validation {
+    condition     = var.iops_alarm_evaluation_periods >= 1
+    error_message = "iops_alarm_evaluation_periods must be at least 1."
+  }
+}
+
+variable "iops_alarm_period" {
+  type        = number
+  description = "Period in seconds over which the IOPS metric is evaluated"
+  default     = 300
+
+  validation {
+    condition     = var.iops_alarm_period >= 60 && var.iops_alarm_period % 60 == 0
+    error_message = "iops_alarm_period must be a multiple of 60 seconds and at least 60 (CloudWatch requirement)."
+  }
+}
+
+variable "alarm_actions" {
+  type        = list(string)
+  description = "List of ARNs to notify when the alarm transitions state (e.g. SNS topic ARNs)"
+  default     = []
+}
--- a/profiling/grafana/dashboards/onyx/opensearch-search-latency.json
+++ b/profiling/grafana/dashboards/onyx/opensearch-search-latency.json
@@ -0,0 +1,349 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": { "type": "grafana", "uid": "-- Grafana --" },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 1,
+  "id": null,
+  "links": [],
+  "liveNow": true,
+  "panels": [
+    {
+      "title": "Client-Side Search Latency (P50 / P95 / P99)",
+      "description": "End-to-end latency as measured by the Python client, including network round-trip and serialization overhead.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 0 },
+      "id": 1,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisLabel": "seconds",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "dashed" }
+          },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 0.5 },
+              { "color": "red", "value": 2.0 }
+            ]
+          },
+          "unit": "s",
+          "min": 0
+        },
+        "overrides": []
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
+          "legendFormat": "P50",
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
+          "legendFormat": "P95",
+          "refId": "B"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
+          "legendFormat": "P99",
+          "refId": "C"
+        }
+      ]
+    },
+    {
+      "title": "Server-Side Search Latency (P50 / P95 / P99)",
+      "description": "OpenSearch server-side execution time from the 'took' field in the response. Does not include network or client-side overhead.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 0 },
+      "id": 2,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisLabel": "seconds",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "dashed" }
+          },
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              { "color": "green", "value": null },
+              { "color": "yellow", "value": 0.5 },
+              { "color": "red", "value": 2.0 }
+            ]
+          },
+          "unit": "s",
+          "min": 0
+        },
+        "overrides": []
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
+          "legendFormat": "P50",
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.95, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
+          "legendFormat": "P95",
+          "refId": "B"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.99, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
+          "legendFormat": "P99",
+          "refId": "C"
+        }
+      ]
+    },
+    {
+      "title": "Client-Side Latency by Search Type (P95)",
+      "description": "P95 client-side latency broken down by search type (hybrid, keyword, semantic, random, doc_id_retrieval).",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
+      "id": 3,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisLabel": "seconds",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "unit": "s",
+          "min": 0
+        },
+        "overrides": []
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.95, sum by (search_type, le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
+          "legendFormat": "{{ search_type }}",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "title": "Search Throughput by Type",
+      "description": "Searches per second broken down by search type.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
+      "id": 4,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisLabel": "searches/s",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "normal" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "unit": "ops",
+          "min": 0
+        },
+        "overrides": []
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "sum by (search_type) (rate(onyx_opensearch_search_total[5m]))",
+          "legendFormat": "{{ search_type }}",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "title": "Concurrent Searches In Progress",
+      "description": "Number of OpenSearch searches currently in flight, broken down by search type. Summed across all instances.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 0, "y": 20 },
+      "id": 5,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisLabel": "searches",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "normal" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "min": 0
+        },
+        "overrides": []
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "sum by (search_type) (onyx_opensearch_searches_in_progress)",
+          "legendFormat": "{{ search_type }}",
+          "refId": "A"
+        }
+      ]
+    },
+    {
+      "title": "Client vs Server Latency Overhead (P50)",
+      "description": "Difference between client-side and server-side P50 latency. Reveals network, serialization, and untracked OpenSearch overhead.",
+      "type": "timeseries",
+      "gridPos": { "h": 10, "w": 12, "x": 12, "y": 20 },
+      "id": 6,
+      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+      "fieldConfig": {
+        "defaults": {
+          "color": { "mode": "palette-classic" },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisLabel": "seconds",
+            "axisPlacement": "auto",
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": { "type": "linear" },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": { "group": "A", "mode": "none" },
+            "thresholdsStyle": { "mode": "off" }
+          },
+          "unit": "s",
+          "min": 0
+        },
+        "overrides": []
+      },
+      "targets": [
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m]))) - histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
+          "legendFormat": "Client - Server overhead (P50)",
+          "refId": "A"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_client_duration_seconds_bucket[5m])))",
+          "legendFormat": "Client P50",
+          "refId": "B"
+        },
+        {
+          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
+          "expr": "histogram_quantile(0.5, sum by (le) (rate(onyx_opensearch_search_server_duration_seconds_bucket[5m])))",
+          "legendFormat": "Server P50",
+          "refId": "C"
+        }
+      ]
+    }
+  ],
+  "refresh": "5s",
+  "schemaVersion": 37,
+  "style": "dark",
+  "tags": ["onyx", "opensearch", "search", "latency"],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "text": "Prometheus",
+          "value": "prometheus"
+        },
+        "includeAll": false,
+        "name": "DS_PROMETHEUS",
+        "options": [],
+        "query": "prometheus",
+        "refresh": 1,
+        "type": "datasource"
+      }
+    ]
+  },
+  "time": { "from": "now-60m", "to": "now" },
+  "timepicker": {
+    "refresh_intervals": ["5s", "10s", "30s", "1m"]
+  },
+  "timezone": "",
+  "title": "Onyx OpenSearch Search Latency",
+  "uid": "onyx-opensearch-search-latency",
+  "version": 0,
+  "weekStart": ""
+}
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,7 +66,7 @@ backend = [
    "jsonref==1.1.0",
    "kubernetes==31.0.0",
    "trafilatura==1.12.2",
-    "langchain-core==1.2.11",
+    "langchain-core==1.2.22",
    "lazy_imports==1.0.1",
    "lxml==5.3.0",
    "Mako==1.2.4",
--- a/uv.lock
+++ b/uv.lock
@@ -1255,61 +1255,61 @@ wheels = [

 [[package]]
 name = "cryptography"
-version = "46.0.5"
+version = "46.0.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/60/04/ee2a9e8542e4fa2773b81771ff8349ff19cdd56b7258a0cc442639052edb/cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d", size = 750064, upload-time = "2026-02-10T19:18:38.255Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a4/ba/04b1bd4218cbc58dc90ce967106d51582371b898690f3ae0402876cc4f34/cryptography-46.0.6.tar.gz", hash = "sha256:27550628a518c5c6c903d84f637fbecf287f6cb9ced3804838a1295dc1fd0759", size = 750542, upload-time = "2026-03-25T23:34:53.396Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f7/81/b0bb27f2ba931a65409c6b8a8b358a7f03c0e46eceacddff55f7c84b1f3b/cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad", size = 7176289, upload-time = "2026-02-10T19:17:08.274Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/9e/6b4397a3e3d15123de3b1806ef342522393d50736c13b20ec4c9ea6693a6/cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b", size = 4275637, upload-time = "2026-02-10T19:17:10.53Z" },
-    { url = "https://files.pythonhosted.org/packages/63/e7/471ab61099a3920b0c77852ea3f0ea611c9702f651600397ac567848b897/cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b", size = 4424742, upload-time = "2026-02-10T19:17:12.388Z" },
-    { url = "https://files.pythonhosted.org/packages/37/53/a18500f270342d66bf7e4d9f091114e31e5ee9e7375a5aba2e85a91e0044/cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263", size = 4277528, upload-time = "2026-02-10T19:17:13.853Z" },
-    { url = "https://files.pythonhosted.org/packages/22/29/c2e812ebc38c57b40e7c583895e73c8c5adb4d1e4a0cc4c5a4fdab2b1acc/cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d", size = 4947993, upload-time = "2026-02-10T19:17:15.618Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/e7/237155ae19a9023de7e30ec64e5d99a9431a567407ac21170a046d22a5a3/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed", size = 4456855, upload-time = "2026-02-10T19:17:17.221Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/87/fc628a7ad85b81206738abbd213b07702bcbdada1dd43f72236ef3cffbb5/cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2", size = 3984635, upload-time = "2026-02-10T19:17:18.792Z" },
-    { url = "https://files.pythonhosted.org/packages/84/29/65b55622bde135aedf4565dc509d99b560ee4095e56989e815f8fd2aa910/cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2", size = 4277038, upload-time = "2026-02-10T19:17:20.256Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/36/45e76c68d7311432741faf1fbf7fac8a196a0a735ca21f504c75d37e2558/cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0", size = 4912181, upload-time = "2026-02-10T19:17:21.825Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/1a/c1ba8fead184d6e3d5afcf03d569acac5ad063f3ac9fb7258af158f7e378/cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731", size = 4456482, upload-time = "2026-02-10T19:17:25.133Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/e5/3fb22e37f66827ced3b902cf895e6a6bc1d095b5b26be26bd13c441fdf19/cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82", size = 4405497, upload-time = "2026-02-10T19:17:26.66Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/df/9d58bb32b1121a8a2f27383fabae4d63080c7ca60b9b5c88be742be04ee7/cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1", size = 4667819, upload-time = "2026-02-10T19:17:28.569Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/ed/325d2a490c5e94038cdb0117da9397ece1f11201f425c4e9c57fe5b9f08b/cryptography-46.0.5-cp311-abi3-win32.whl", hash = "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48", size = 3028230, upload-time = "2026-02-10T19:17:30.518Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/5a/ac0f49e48063ab4255d9e3b79f5def51697fce1a95ea1370f03dc9db76f6/cryptography-46.0.5-cp311-abi3-win_amd64.whl", hash = "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4", size = 3480909, upload-time = "2026-02-10T19:17:32.083Z" },
-    { url = "https://files.pythonhosted.org/packages/00/13/3d278bfa7a15a96b9dc22db5a12ad1e48a9eb3d40e1827ef66a5df75d0d0/cryptography-46.0.5-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2", size = 7119287, upload-time = "2026-02-10T19:17:33.801Z" },
-    { url = "https://files.pythonhosted.org/packages/67/c8/581a6702e14f0898a0848105cbefd20c058099e2c2d22ef4e476dfec75d7/cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678", size = 4265728, upload-time = "2026-02-10T19:17:35.569Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/4a/ba1a65ce8fc65435e5a849558379896c957870dd64fecea97b1ad5f46a37/cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87", size = 4408287, upload-time = "2026-02-10T19:17:36.938Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/67/8ffdbf7b65ed1ac224d1c2df3943553766914a8ca718747ee3871da6107e/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee", size = 4270291, upload-time = "2026-02-10T19:17:38.748Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/e5/f52377ee93bc2f2bba55a41a886fd208c15276ffbd2569f2ddc89d50e2c5/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981", size = 4927539, upload-time = "2026-02-10T19:17:40.241Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/02/cfe39181b02419bbbbcf3abdd16c1c5c8541f03ca8bda240debc467d5a12/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9", size = 4442199, upload-time = "2026-02-10T19:17:41.789Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/96/2fcaeb4873e536cf71421a388a6c11b5bc846e986b2b069c79363dc1648e/cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648", size = 3960131, upload-time = "2026-02-10T19:17:43.379Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/d2/b27631f401ddd644e94c5cf33c9a4069f72011821cf3dc7309546b0642a0/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4", size = 4270072, upload-time = "2026-02-10T19:17:45.481Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/a7/60d32b0370dae0b4ebe55ffa10e8599a2a59935b5ece1b9f06edb73abdeb/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0", size = 4892170, upload-time = "2026-02-10T19:17:46.997Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/b9/cf73ddf8ef1164330eb0b199a589103c363afa0cf794218c24d524a58eab/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663", size = 4441741, upload-time = "2026-02-10T19:17:48.661Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/eb/eee00b28c84c726fe8fa0158c65afe312d9c3b78d9d01daf700f1f6e37ff/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826", size = 4396728, upload-time = "2026-02-10T19:17:50.058Z" },
-    { url = "https://files.pythonhosted.org/packages/65/f4/6bc1a9ed5aef7145045114b75b77c2a8261b4d38717bd8dea111a63c3442/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d", size = 4652001, upload-time = "2026-02-10T19:17:51.54Z" },
-    { url = "https://files.pythonhosted.org/packages/86/ef/5d00ef966ddd71ac2e6951d278884a84a40ffbd88948ef0e294b214ae9e4/cryptography-46.0.5-cp314-cp314t-win32.whl", hash = "sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a", size = 3003637, upload-time = "2026-02-10T19:17:52.997Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/57/f3f4160123da6d098db78350fdfd9705057aad21de7388eacb2401dceab9/cryptography-46.0.5-cp314-cp314t-win_amd64.whl", hash = "sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4", size = 3469487, upload-time = "2026-02-10T19:17:54.549Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/fa/a66aa722105ad6a458bebd64086ca2b72cdd361fed31763d20390f6f1389/cryptography-46.0.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31", size = 7170514, upload-time = "2026-02-10T19:17:56.267Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/04/c85bdeab78c8bc77b701bf0d9bdcf514c044e18a46dcff330df5448631b0/cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18", size = 4275349, upload-time = "2026-02-10T19:17:58.419Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/32/9b87132a2f91ee7f5223b091dc963055503e9b442c98fc0b8a5ca765fab0/cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235", size = 4420667, upload-time = "2026-02-10T19:18:00.619Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/a6/a7cb7010bec4b7c5692ca6f024150371b295ee1c108bdc1c400e4c44562b/cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a", size = 4276980, upload-time = "2026-02-10T19:18:02.379Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/7c/c4f45e0eeff9b91e3f12dbd0e165fcf2a38847288fcfd889deea99fb7b6d/cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76", size = 4939143, upload-time = "2026-02-10T19:18:03.964Z" },
-    { url = "https://files.pythonhosted.org/packages/37/19/e1b8f964a834eddb44fa1b9a9976f4e414cbb7aa62809b6760c8803d22d1/cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614", size = 4453674, upload-time = "2026-02-10T19:18:05.588Z" },
-    { url = "https://files.pythonhosted.org/packages/db/ed/db15d3956f65264ca204625597c410d420e26530c4e2943e05a0d2f24d51/cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229", size = 3978801, upload-time = "2026-02-10T19:18:07.167Z" },
-    { url = "https://files.pythonhosted.org/packages/41/e2/df40a31d82df0a70a0daf69791f91dbb70e47644c58581d654879b382d11/cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1", size = 4276755, upload-time = "2026-02-10T19:18:09.813Z" },
-    { url = "https://files.pythonhosted.org/packages/33/45/726809d1176959f4a896b86907b98ff4391a8aa29c0aaaf9450a8a10630e/cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d", size = 4901539, upload-time = "2026-02-10T19:18:11.263Z" },
-    { url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c", size = 4452794, upload-time = "2026-02-10T19:18:12.914Z" },
-    { url = "https://files.pythonhosted.org/packages/02/ef/ffeb542d3683d24194a38f66ca17c0a4b8bf10631feef44a7ef64e631b1a/cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4", size = 4404160, upload-time = "2026-02-10T19:18:14.375Z" },
-    { url = "https://files.pythonhosted.org/packages/96/93/682d2b43c1d5f1406ed048f377c0fc9fc8f7b0447a478d5c65ab3d3a66eb/cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9", size = 4667123, upload-time = "2026-02-10T19:18:15.886Z" },
-    { url = "https://files.pythonhosted.org/packages/45/2d/9c5f2926cb5300a8eefc3f4f0b3f3df39db7f7ce40c8365444c49363cbda/cryptography-46.0.5-cp38-abi3-win32.whl", hash = "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72", size = 3010220, upload-time = "2026-02-10T19:18:17.361Z" },
-    { url = "https://files.pythonhosted.org/packages/48/ef/0c2f4a8e31018a986949d34a01115dd057bf536905dca38897bacd21fac3/cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595", size = 3467050, upload-time = "2026-02-10T19:18:18.899Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/dd/2d9fdb07cebdf3d51179730afb7d5e576153c6744c3ff8fded23030c204e/cryptography-46.0.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:3b4995dc971c9fb83c25aa44cf45f02ba86f71ee600d81091c2f0cbae116b06c", size = 3476964, upload-time = "2026-02-10T19:18:20.687Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/6f/6cc6cc9955caa6eaf83660b0da2b077c7fe8ff9950a3c5e45d605038d439/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:bc84e875994c3b445871ea7181d424588171efec3e185dced958dad9e001950a", size = 4218321, upload-time = "2026-02-10T19:18:22.349Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/5d/c4da701939eeee699566a6c1367427ab91a8b7088cc2328c09dbee940415/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2ae6971afd6246710480e3f15824ed3029a60fc16991db250034efd0b9fb4356", size = 4381786, upload-time = "2026-02-10T19:18:24.529Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/97/a538654732974a94ff96c1db621fa464f455c02d4bb7d2652f4edc21d600/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d861ee9e76ace6cf36a6a89b959ec08e7bc2493ee39d07ffe5acb23ef46d27da", size = 4217990, upload-time = "2026-02-10T19:18:25.957Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/11/7e500d2dd3ba891197b9efd2da5454b74336d64a7cc419aa7327ab74e5f6/cryptography-46.0.5-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:2b7a67c9cd56372f3249b39699f2ad479f6991e62ea15800973b956f4b73e257", size = 4381252, upload-time = "2026-02-10T19:18:27.496Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/58/6b3d24e6b9bc474a2dcdee65dfd1f008867015408a271562e4b690561a4d/cryptography-46.0.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:8456928655f856c6e1533ff59d5be76578a7157224dbd9ce6872f25055ab9ab7", size = 3407605, upload-time = "2026-02-10T19:18:29.233Z" },
+    { url = "https://files.pythonhosted.org/packages/47/23/9285e15e3bc57325b0a72e592921983a701efc1ee8f91c06c5f0235d86d9/cryptography-46.0.6-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:64235194bad039a10bb6d2d930ab3323baaec67e2ce36215fd0952fad0930ca8", size = 7176401, upload-time = "2026-03-25T23:33:22.096Z" },
+    { url = "https://files.pythonhosted.org/packages/60/f8/e61f8f13950ab6195b31913b42d39f0f9afc7d93f76710f299b5ec286ae6/cryptography-46.0.6-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:26031f1e5ca62fcb9d1fcb34b2b60b390d1aacaa15dc8b895a9ed00968b97b30", size = 4275275, upload-time = "2026-03-25T23:33:23.844Z" },
+    { url = "https://files.pythonhosted.org/packages/19/69/732a736d12c2631e140be2348b4ad3d226302df63ef64d30dfdb8db7ad1c/cryptography-46.0.6-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9a693028b9cbe51b5a1136232ee8f2bc242e4e19d456ded3fa7c86e43c713b4a", size = 4425320, upload-time = "2026-03-25T23:33:25.703Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/12/123be7292674abf76b21ac1fc0e1af50661f0e5b8f0ec8285faac18eb99e/cryptography-46.0.6-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:67177e8a9f421aa2d3a170c3e56eca4e0128883cf52a071a7cbf53297f18b175", size = 4278082, upload-time = "2026-03-25T23:33:27.423Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/ba/d5e27f8d68c24951b0a484924a84c7cdaed7502bac9f18601cd357f8b1d2/cryptography-46.0.6-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:d9528b535a6c4f8ff37847144b8986a9a143585f0540fbcb1a98115b543aa463", size = 4926514, upload-time = "2026-03-25T23:33:29.206Z" },
+    { url = "https://files.pythonhosted.org/packages/34/71/1ea5a7352ae516d5512d17babe7e1b87d9db5150b21f794b1377eac1edc0/cryptography-46.0.6-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:22259338084d6ae497a19bae5d4c66b7ca1387d3264d1c2c0e72d9e9b6a77b97", size = 4457766, upload-time = "2026-03-25T23:33:30.834Z" },
+    { url = "https://files.pythonhosted.org/packages/01/59/562be1e653accee4fdad92c7a2e88fced26b3fdfce144047519bbebc299e/cryptography-46.0.6-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:760997a4b950ff00d418398ad73fbc91aa2894b5c1db7ccb45b4f68b42a63b3c", size = 3986535, upload-time = "2026-03-25T23:33:33.02Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/8b/b1ebfeb788bf4624d36e45ed2662b8bd43a05ff62157093c1539c1288a18/cryptography-46.0.6-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:3dfa6567f2e9e4c5dceb8ccb5a708158a2a871052fa75c8b78cb0977063f1507", size = 4277618, upload-time = "2026-03-25T23:33:34.567Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/52/a005f8eabdb28df57c20f84c44d397a755782d6ff6d455f05baa2785bd91/cryptography-46.0.6-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:cdcd3edcbc5d55757e5f5f3d330dd00007ae463a7e7aa5bf132d1f22a4b62b19", size = 4890802, upload-time = "2026-03-25T23:33:37.034Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/4d/8e7d7245c79c617d08724e2efa397737715ca0ec830ecb3c91e547302555/cryptography-46.0.6-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:d4e4aadb7fc1f88687f47ca20bb7227981b03afaae69287029da08096853b738", size = 4457425, upload-time = "2026-03-25T23:33:38.904Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/5c/f6c3596a1430cec6f949085f0e1a970638d76f81c3ea56d93d564d04c340/cryptography-46.0.6-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2b417edbe8877cda9022dde3a008e2deb50be9c407eef034aeeb3a8b11d9db3c", size = 4405530, upload-time = "2026-03-25T23:33:40.842Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/c9/9f9cea13ee2dbde070424e0c4f621c091a91ffcc504ffea5e74f0e1daeff/cryptography-46.0.6-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:380343e0653b1c9d7e1f55b52aaa2dbb2fdf2730088d48c43ca1c7c0abb7cc2f", size = 4667896, upload-time = "2026-03-25T23:33:42.781Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/b5/1895bc0821226f129bc74d00eccfc6a5969e2028f8617c09790bf89c185e/cryptography-46.0.6-cp311-abi3-win32.whl", hash = "sha256:bcb87663e1f7b075e48c3be3ecb5f0b46c8fc50b50a97cf264e7f60242dca3f2", size = 3026348, upload-time = "2026-03-25T23:33:45.021Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/f8/c9bcbf0d3e6ad288b9d9aa0b1dee04b063d19e8c4f871855a03ab3a297ab/cryptography-46.0.6-cp311-abi3-win_amd64.whl", hash = "sha256:6739d56300662c468fddb0e5e291f9b4d084bead381667b9e654c7dd81705124", size = 3483896, upload-time = "2026-03-25T23:33:46.649Z" },
+    { url = "https://files.pythonhosted.org/packages/01/41/3a578f7fd5c70611c0aacba52cd13cb364a5dee895a5c1d467208a9380b0/cryptography-46.0.6-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:2ef9e69886cbb137c2aef9772c2e7138dc581fad4fcbcf13cc181eb5a3ab6275", size = 7117147, upload-time = "2026-03-25T23:33:48.249Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/87/887f35a6fca9dde90cad08e0de0c89263a8e59b2d2ff904fd9fcd8025b6f/cryptography-46.0.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7f417f034f91dcec1cb6c5c35b07cdbb2ef262557f701b4ecd803ee8cefed4f4", size = 4266221, upload-time = "2026-03-25T23:33:49.874Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/a8/0a90c4f0b0871e0e3d1ed126aed101328a8a57fd9fd17f00fb67e82a51ca/cryptography-46.0.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d24c13369e856b94892a89ddf70b332e0b70ad4a5c43cf3e9cb71d6d7ffa1f7b", size = 4408952, upload-time = "2026-03-25T23:33:52.128Z" },
+    { url = "https://files.pythonhosted.org/packages/16/0b/b239701eb946523e4e9f329336e4ff32b1247e109cbab32d1a7b61da8ed7/cryptography-46.0.6-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:aad75154a7ac9039936d50cf431719a2f8d4ed3d3c277ac03f3339ded1a5e707", size = 4270141, upload-time = "2026-03-25T23:33:54.11Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/a8/976acdd4f0f30df7b25605f4b9d3d89295351665c2091d18224f7ad5cdbf/cryptography-46.0.6-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:3c21d92ed15e9cfc6eb64c1f5a0326db22ca9c2566ca46d845119b45b4400361", size = 4904178, upload-time = "2026-03-25T23:33:55.725Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/1b/bf0e01a88efd0e59679b69f42d4afd5bced8700bb5e80617b2d63a3741af/cryptography-46.0.6-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:4668298aef7cddeaf5c6ecc244c2302a2b8e40f384255505c22875eebb47888b", size = 4441812, upload-time = "2026-03-25T23:33:57.364Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/8b/11df86de2ea389c65aa1806f331cae145f2ed18011f30234cc10ca253de8/cryptography-46.0.6-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:8ce35b77aaf02f3b59c90b2c8a05c73bac12cea5b4e8f3fbece1f5fddea5f0ca", size = 3963923, upload-time = "2026-03-25T23:33:59.361Z" },
+    { url = "https://files.pythonhosted.org/packages/91/e0/207fb177c3a9ef6a8108f234208c3e9e76a6aa8cf20d51932916bd43bda0/cryptography-46.0.6-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:c89eb37fae9216985d8734c1afd172ba4927f5a05cfd9bf0e4863c6d5465b013", size = 4269695, upload-time = "2026-03-25T23:34:00.909Z" },
+    { url = "https://files.pythonhosted.org/packages/21/5e/19f3260ed1e95bced52ace7501fabcd266df67077eeb382b79c81729d2d3/cryptography-46.0.6-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:ed418c37d095aeddf5336898a132fba01091f0ac5844e3e8018506f014b6d2c4", size = 4869785, upload-time = "2026-03-25T23:34:02.796Z" },
+    { url = "https://files.pythonhosted.org/packages/10/38/cd7864d79aa1d92ef6f1a584281433419b955ad5a5ba8d1eb6c872165bcb/cryptography-46.0.6-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:69cf0056d6947edc6e6760e5f17afe4bea06b56a9ac8a06de9d2bd6b532d4f3a", size = 4441404, upload-time = "2026-03-25T23:34:04.35Z" },
+    { url = "https://files.pythonhosted.org/packages/09/0a/4fe7a8d25fed74419f91835cf5829ade6408fd1963c9eae9c4bce390ecbb/cryptography-46.0.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8e7304c4f4e9490e11efe56af6713983460ee0780f16c63f219984dab3af9d2d", size = 4397549, upload-time = "2026-03-25T23:34:06.342Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/a0/7d738944eac6513cd60a8da98b65951f4a3b279b93479a7e8926d9cd730b/cryptography-46.0.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b928a3ca837c77a10e81a814a693f2295200adb3352395fad024559b7be7a736", size = 4651874, upload-time = "2026-03-25T23:34:07.916Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/f1/c2326781ca05208845efca38bf714f76939ae446cd492d7613808badedf1/cryptography-46.0.6-cp314-cp314t-win32.whl", hash = "sha256:97c8115b27e19e592a05c45d0dd89c57f81f841cc9880e353e0d3bf25b2139ed", size = 3001511, upload-time = "2026-03-25T23:34:09.892Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/57/fe4a23eb549ac9d903bd4698ffda13383808ef0876cc912bcb2838799ece/cryptography-46.0.6-cp314-cp314t-win_amd64.whl", hash = "sha256:c797e2517cb7880f8297e2c0f43bb910e91381339336f75d2c1c2cbf811b70b4", size = 3471692, upload-time = "2026-03-25T23:34:11.613Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/cc/f330e982852403da79008552de9906804568ae9230da8432f7496ce02b71/cryptography-46.0.6-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:12cae594e9473bca1a7aceb90536060643128bb274fcea0fc459ab90f7d1ae7a", size = 7162776, upload-time = "2026-03-25T23:34:13.308Z" },
+    { url = "https://files.pythonhosted.org/packages/49/b3/dc27efd8dcc4bff583b3f01d4a3943cd8b5821777a58b3a6a5f054d61b79/cryptography-46.0.6-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:639301950939d844a9e1c4464d7e07f902fe9a7f6b215bb0d4f28584729935d8", size = 4270529, upload-time = "2026-03-25T23:34:15.019Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/05/e8d0e6eb4f0d83365b3cb0e00eb3c484f7348db0266652ccd84632a3d58d/cryptography-46.0.6-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ed3775295fb91f70b4027aeba878d79b3e55c0b3e97eaa4de71f8f23a9f2eb77", size = 4414827, upload-time = "2026-03-25T23:34:16.604Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/97/daba0f5d2dc6d855e2dcb70733c812558a7977a55dd4a6722756628c44d1/cryptography-46.0.6-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8927ccfbe967c7df312ade694f987e7e9e22b2425976ddbf28271d7e58845290", size = 4271265, upload-time = "2026-03-25T23:34:18.586Z" },
+    { url = "https://files.pythonhosted.org/packages/89/06/fe1fce39a37ac452e58d04b43b0855261dac320a2ebf8f5260dd55b201a9/cryptography-46.0.6-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:b12c6b1e1651e42ab5de8b1e00dc3b6354fdfd778e7fa60541ddacc27cd21410", size = 4916800, upload-time = "2026-03-25T23:34:20.561Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/8a/b14f3101fe9c3592603339eb5d94046c3ce5f7fc76d6512a2d40efd9724e/cryptography-46.0.6-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:063b67749f338ca9c5a0b7fe438a52c25f9526b851e24e6c9310e7195aad3b4d", size = 4448771, upload-time = "2026-03-25T23:34:22.406Z" },
+    { url = "https://files.pythonhosted.org/packages/01/b3/0796998056a66d1973fd52ee89dc1bb3b6581960a91ad4ac705f182d398f/cryptography-46.0.6-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:02fad249cb0e090b574e30b276a3da6a149e04ee2f049725b1f69e7b8351ec70", size = 3978333, upload-time = "2026-03-25T23:34:24.281Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/3d/db200af5a4ffd08918cd55c08399dc6c9c50b0bc72c00a3246e099d3a849/cryptography-46.0.6-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:7e6142674f2a9291463e5e150090b95a8519b2fb6e6aaec8917dd8d094ce750d", size = 4271069, upload-time = "2026-03-25T23:34:25.895Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/18/61acfd5b414309d74ee838be321c636fe71815436f53c9f0334bf19064fa/cryptography-46.0.6-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:456b3215172aeefb9284550b162801d62f5f264a081049a3e94307fe20792cfa", size = 4878358, upload-time = "2026-03-25T23:34:27.67Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/65/5bf43286d566f8171917cae23ac6add941654ccf085d739195a4eacf1674/cryptography-46.0.6-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:341359d6c9e68834e204ceaf25936dffeafea3829ab80e9503860dcc4f4dac58", size = 4448061, upload-time = "2026-03-25T23:34:29.375Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/25/7e49c0fa7205cf3597e525d156a6bce5b5c9de1fd7e8cb01120e459f205a/cryptography-46.0.6-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9a9c42a2723999a710445bc0d974e345c32adfd8d2fac6d8a251fa829ad31cfb", size = 4399103, upload-time = "2026-03-25T23:34:32.036Z" },
+    { url = "https://files.pythonhosted.org/packages/44/46/466269e833f1c4718d6cd496ffe20c56c9c8d013486ff66b4f69c302a68d/cryptography-46.0.6-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6617f67b1606dfd9fe4dbfa354a9508d4a6d37afe30306fe6c101b7ce3274b72", size = 4659255, upload-time = "2026-03-25T23:34:33.679Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/09/ddc5f630cc32287d2c953fc5d32705e63ec73e37308e5120955316f53827/cryptography-46.0.6-cp38-abi3-win32.whl", hash = "sha256:7f6690b6c55e9c5332c0b59b9c8a3fb232ebf059094c17f9019a51e9827df91c", size = 3010660, upload-time = "2026-03-25T23:34:35.418Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/82/ca4893968aeb2709aacfb57a30dec6fa2ab25b10fa9f064b8882ce33f599/cryptography-46.0.6-cp38-abi3-win_amd64.whl", hash = "sha256:79e865c642cfc5c0b3eb12af83c35c5aeff4fa5c672dc28c43721c2c9fdd2f0f", size = 3471160, upload-time = "2026-03-25T23:34:37.191Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/84/7ccff00ced5bac74b775ce0beb7d1be4e8637536b522b5df9b73ada42da2/cryptography-46.0.6-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:2ea0f37e9a9cf0df2952893ad145fd9627d326a59daec9b0802480fa3bcd2ead", size = 3475444, upload-time = "2026-03-25T23:34:38.944Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/1f/4c926f50df7749f000f20eede0c896769509895e2648db5da0ed55db711d/cryptography-46.0.6-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a3e84d5ec9ba01f8fd03802b2147ba77f0c8f2617b2aff254cedd551844209c8", size = 4218227, upload-time = "2026-03-25T23:34:40.871Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/65/707be3ffbd5f786028665c3223e86e11c4cda86023adbc56bd72b1b6bab5/cryptography-46.0.6-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:12f0fa16cc247b13c43d56d7b35287ff1569b5b1f4c5e87e92cc4fcc00cd10c0", size = 4381399, upload-time = "2026-03-25T23:34:42.609Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/6d/73557ed0ef7d73d04d9aba745d2c8e95218213687ee5e76b7d236a5030fc/cryptography-46.0.6-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:50575a76e2951fe7dbd1f56d181f8c5ceeeb075e9ff88e7ad997d2f42af06e7b", size = 4217595, upload-time = "2026-03-25T23:34:44.205Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/c5/e1594c4eec66a567c3ac4400008108a415808be2ce13dcb9a9045c92f1a0/cryptography-46.0.6-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:90e5f0a7b3be5f40c3a0a0eafb32c681d8d2c181fc2a1bdabe9b3f611d9f6b1a", size = 4380912, upload-time = "2026-03-25T23:34:46.328Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/89/843b53614b47f97fe1abc13f9a86efa5ec9e275292c457af1d4a60dc80e0/cryptography-46.0.6-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6728c49e3b2c180ef26f8e9f0a883a2c585638db64cf265b49c9ba10652d430e", size = 3409955, upload-time = "2026-03-25T23:34:48.465Z" },
 ]

 [[package]]
@@ -3048,7 +3048,7 @@ wheels = [

 [[package]]
 name = "langchain-core"
-version = "1.2.11"
+version = "1.2.22"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "jsonpatch" },
@@ -3060,9 +3060,9 @@ dependencies = [
    { name = "typing-extensions" },
    { name = "uuid-utils" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/12/17/1943cedfc118e04b8128e4c3e1dbf0fa0ea58eefddbb6198cfd699d19f01/langchain_core-1.2.11.tar.gz", hash = "sha256:f164bb36602dd74a3a50c1334fca75309ad5ed95767acdfdbb9fa95ce28a1e01", size = 831211, upload-time = "2026-02-10T20:35:28.35Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b1/a3/c4cd6827a1df46c821e7214b7f7b7a28b189e6c9b84ef15c6d629c5e3179/langchain_core-1.2.22.tar.gz", hash = "sha256:8d8f726d03d3652d403da915126626bb6250747e8ba406537d849e68b9f5d058", size = 842487, upload-time = "2026-03-24T18:48:44.9Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/30/1f80e3fc674353cad975ed5294353d42512535d2094ef032c06454c2c873/langchain_core-1.2.11-py3-none-any.whl", hash = "sha256:ae11ceb8dda60d0b9d09e763116e592f1683327c17be5b715f350fd29aee65d3", size = 500062, upload-time = "2026-02-10T20:35:26.698Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/a6/2ffacf0f1a3788f250e75d0b52a24896c413be11be3a6d42bcdf46fbea48/langchain_core-1.2.22-py3-none-any.whl", hash = "sha256:7e30d586b75918e828833b9ec1efc25465723566845dd652c277baf751e9c04b", size = 506829, upload-time = "2026-03-24T18:48:43.286Z" },
 ]

 [[package]]
@@ -4439,7 +4439,7 @@ requires-dist = [
    { name = "jsonref", marker = "extra == 'backend'", specifier = "==1.1.0" },
    { name = "kubernetes", specifier = ">=31.0.0" },
    { name = "kubernetes", marker = "extra == 'backend'", specifier = "==31.0.0" },
-    { name = "langchain-core", marker = "extra == 'backend'", specifier = "==1.2.11" },
+    { name = "langchain-core", marker = "extra == 'backend'", specifier = "==1.2.22" },
    { name = "langfuse", marker = "extra == 'backend'", specifier = "==3.10.0" },
    { name = "lazy-imports", marker = "extra == 'backend'", specifier = "==1.0.1" },
    { name = "litellm", specifier = "==1.81.6" },
@@ -5634,11 +5634,11 @@ wheels = [

 [[package]]
 name = "pygments"
-version = "2.19.2"
+version = "2.20.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" },
 ]

 [[package]]
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Bo-Onyx	39a3ee1a0a	feat(hook) frontend ee	2026-03-31 18:54:14 -07:00
Justin Tahara	a1c3a68ba4	fix(perf): optimize chat sessions query to prevent DB cascading failures (#9802 )	2026-04-01 01:28:37 +00:00
Evan Lohn	4fb175ae65	fix: install early exit (#9818 )	2026-04-01 01:09:05 +00:00
Evan Lohn	800ad326df	fix: discord token validation (#9817 )	2026-04-01 01:08:38 +00:00
Bo-Onyx	6b920e8a3e	feat(hook): refactor under ee (#9776 )	2026-04-01 01:07:55 +00:00
Justin Tahara	ef3760796d	feat(rds): Adding IO Metrics Alarms (#9789 )	2026-04-01 01:07:45 +00:00
Jessica Singh	fa5b90df92	fix(connectors): fix reindex on paused file connectors (#9812 )	2026-03-31 23:10:09 +00:00
Evan Lohn	53953ac4fa	chore: fix indexing log2 (#9811 )	2026-03-31 21:02:54 +00:00
Yuhong Sun	26bb5c990c	chore: Rag script for benchmark/regression (#9781 )	2026-03-31 20:46:17 +00:00
Evan Lohn	27b4ed301f	chore: fix batch logging (#9808 )	2026-03-31 20:10:33 +00:00
Jessica Singh	93ec270ccc	feat(voice): VAD auto-stop only when auto-send is enabled (#9809 )	2026-03-31 19:31:31 +00:00
Raunak Bhagat	9e2d6c8a1d	refactor(admin): code-interpreter (#9790 )	2026-03-31 19:08:55 +00:00
Nikolas Garza	fc934214d0	perf(swr): add SWR_KEYS registry and skip revalidation for stable hooks (#9695 )	2026-03-31 19:07:42 +00:00
Raunak Bhagat	48fc45a0cd	refactor(admin): web-search (#9761 )	2026-03-31 19:04:18 +00:00
Jessica Singh	009266e53e	fix(llm): when multiple providers are same type ensure name is prioritized when default (#9777 )	2026-03-31 19:03:38 +00:00
Raunak Bhagat	ffb9df7308	refactor(admin): LLM Config (#9806 )	2026-03-31 19:03:17 +00:00
Raunak Bhagat	b0f5e0b8d9	refactor(admin): image-generation (#9769 )	2026-03-31 18:13:23 +00:00
acaprau	43aea5d614	chore(opensearch): Add Grafana dashboard for retrieval (#9657 ) Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>	2026-03-31 16:56:40 +00:00
Bo-Onyx	593d82f431	feat(hook): hook status and logs (#9770 )	2026-03-31 16:10:12 +00:00
Ben Wu	adf5691b5f	feat(canvas 2/4): Canvas Connector data fetching (#9386 )	2026-03-31 03:07:05 +00:00
Nikolas Garza	c1a8a5bd83	fix(tenants): run migrations on pool tenants before assigning to new users (#9788 )	2026-03-31 01:24:01 +00:00
Justin Tahara	8fd486da99	feat(rds): Add Freeable Memory alert (#9787 )	2026-03-31 00:59:30 +00:00
Raunak Bhagat	4bda4d3637	refactor: migrate away from `cards/Select` (#9771 )	2026-03-31 00:27:01 +00:00
Justin Tahara	13c25eadad	feat(rds): Adding CPU Alerts (#9784 )	2026-03-31 00:22:15 +00:00
Justin Tahara	1f244e6388	feat(eks): Adding Cloudwatch logging (#9783 )	2026-03-30 23:52:44 +00:00
Nikolas Garza	18b0416d30	feat(sentry): enable frontend source map uploads in cloud CI (#9775 )	2026-03-30 23:42:57 +00:00
Nikolas Garza	4bc0bc1efb	feat(helm): add Grafana dashboard provisioning (#9725 )	2026-03-30 23:42:32 +00:00
Justin Tahara	1555217061	feat(rds): Adding RDS Snapshosts (#9779 )	2026-03-30 23:17:08 +00:00
Nikolas Garza	d177a833f0	feat(sentry): add release tracking to backend and frontend (#9773 )	2026-03-30 22:35:38 +00:00
Jamison Lahman	086997d3c5	chore(types): fix IconButton size props (#9772 )	2026-03-30 21:40:25 +00:00
dependabot[bot]	dccec78397	chore(deps): bump helm/chart-testing-action from b5eebdd9998021f29756c53432f48dab66394810 to 2e2940618cb426dce2999631d543b53cdcfc8527 (#9764 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-03-30 14:41:01 -07:00
Jamison Lahman	0123133621	chore(fe): polish Query History table (#9767 )	2026-03-30 21:30:13 +00:00
dependabot[bot]	0b9d154a73	chore(deps): bump runs-on/cache from 50350ad4242587b6c8c2baa2e740b1bc11285ff4 to a5f51d6f3fece787d03b7b4e981c82538a0654ed (#9763 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-03-30 13:54:43 -07:00
dependabot[bot]	6e65e55bf5	chore(deps): bump actions/cache from 5.0.3 to 5.0.4 (#9765 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-03-30 13:46:53 -07:00
Raunak Bhagat	3f9e208759	feat(opal): `SelectCard` + `CardHeaderLayout` (#9760 )	2026-03-30 19:54:54 +00:00
dependabot[bot]	fb8edda14a	chore(deps): bump pygments from 2.19.2 to 2.20.0 (#9757 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-03-30 18:30:18 +00:00
Jamison Lahman	58decd8a6b	chore(gha): prefer `ci-protected` env (#9728 )	2026-03-30 17:20:54 +00:00
Danelegend	e97204d9cc	feat(indexing): Batch chunks during doc processing (#9468 )	2026-03-30 11:49:36 +00:00
Danelegend	44ab02c94f	refactor(indexing): Refactor indexing vector db abstraction (#9653 )	2026-03-30 09:57:16 +00:00
Danelegend	a98cc30f25	refactor(indexing): Change adapters to support iterables (#9469 )	2026-03-30 01:43:10 +00:00
Danelegend	a709dcb8fa	feat(indexing): Max chunk processing (#9400 )	2026-03-30 00:10:24 +00:00
Raunak Bhagat	a3dfe6aa1b	refactor(opal): unify Interactive color system (#9717 )	2026-03-28 00:40:23 +00:00
Nikolas Garza	23e4d55fb1	perf(swr): convert raw-fetch hooks to SWR to eliminate duplicate requests (#9694 )	2026-03-28 00:26:20 +00:00
Jamison Lahman	470cc85f83	feat(cli): `onyx-cli serve` over SSH (#9726 )	2026-03-27 23:46:14 +00:00
Justin Tahara	64d9be5a41	fix(openpyxl): Colors must be aRGB hex values (#9727 )	2026-03-27 23:14:36 +00:00
roshan	71a5b469b0	feat(widget): add citation badges to chat widget (#9714 )	2026-03-27 22:39:46 +00:00
Evan Lohn	462eb0697f	fix: Anthropic litellm thinking workaround (#9713 )	2026-03-27 21:03:05 +00:00
dependabot[bot]	b708dc8796	chore(deps): bump langchain-core from 1.2.11 to 1.2.22 (#9720 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-03-27 20:50:19 +00:00
dependabot[bot]	c9e2c32f55	chore(deps): bump cryptography from 46.0.5 to 46.0.6 (#9721 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-03-27 20:48:59 +00:00