.

Add db migration
2026-03-31 20:42:41 +00:00 · 2026-03-31 22:52:59 +11:00 · 2026-03-31 22:52:09 +11:00 · 2026-03-31 19:25:23 +11:00 · 2026-03-31 18:54:48 +11:00 · 2026-03-31 17:07:06 +11:00
260 changed files with 4020 additions and 12603 deletions
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
@@ -704,9 +704,6 @@ jobs:
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
            NODE_OPTIONS=--max-old-space-size=8192
-            SENTRY_RELEASE=${{ github.sha }}
-          secrets: |
-            sentry_auth_token=${{ secrets.SENTRY_AUTH_TOKEN }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-amd64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
@@ -789,9 +786,6 @@ jobs:
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
            NODE_OPTIONS=--max-old-space-size=8192
-            SENTRY_RELEASE=${{ github.sha }}
-          secrets: |
-            sentry_auth_token=${{ secrets.SENTRY_AUTH_TOKEN }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-arm64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -47,8 +47,7 @@ jobs:
          done

      - name: Publish Helm charts to gh-pages
-        # NOTE: HEAD of https://github.com/stefanprodan/helm-gh-pages/pull/43
-        uses: stefanprodan/helm-gh-pages@ad32ad3b8720abfeaac83532fd1e9bdfca5bbe27 # zizmor: ignore[impostor-commit]
+        uses: stefanprodan/helm-gh-pages@0ad2bb377311d61ac04ad9eb6f252fb68e207260 # ratchet:stefanprodan/helm-gh-pages@v1.7.0
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          charts_dir: deployment/helm/charts
--- a/.github/workflows/nightly-llm-provider-chat.yml
+++ b/.github/workflows/nightly-llm-provider-chat.yml
@@ -35,7 +35,6 @@ jobs:
    needs: [provider-chat-test]
    if: failure() && github.event_name == 'schedule'
    runs-on: ubuntu-slim
-    environment: ci-protected
    timeout-minutes: 5
    steps:
      - name: Checkout
--- a/.github/workflows/post-merge-beta-cherry-pick.yml
+++ b/.github/workflows/post-merge-beta-cherry-pick.yml
@@ -183,7 +183,6 @@ jobs:
      - cherry-pick-to-latest-release
    if: needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && needs.resolve-cherry-pick-request.result == 'success' && needs.cherry-pick-to-latest-release.result == 'success'
    runs-on: ubuntu-slim
-    environment: ci-protected
    timeout-minutes: 10
    steps:
      - name: Checkout
@@ -233,7 +232,6 @@ jobs:
      - cherry-pick-to-latest-release
    if: always() && needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && (needs.resolve-cherry-pick-request.result == 'failure' || needs.cherry-pick-to-latest-release.result == 'failure')
    runs-on: ubuntu-slim
-    environment: ci-protected
    timeout-minutes: 10
    steps:
      - name: Checkout
--- a/.github/workflows/pr-desktop-build.yml
+++ b/.github/workflows/pr-desktop-build.yml
@@ -63,7 +63,7 @@ jobs:
          targets: ${{ matrix.target }}

      - name: Cache Cargo registry and build
-        uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # zizmor: ignore[cache-poisoning]
+        uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # zizmor: ignore[cache-poisoning]
        with:
          path: |
            ~/.cargo/bin/
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -41,7 +41,7 @@ jobs:
          version: v3.19.0

      - name: Set up chart-testing
-        uses: helm/chart-testing-action@2e2940618cb426dce2999631d543b53cdcfc8527
+        uses: helm/chart-testing-action@b5eebdd9998021f29756c53432f48dab66394810
        with:
          uv_version: "0.9.9"

--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -284,7 +284,7 @@ jobs:

      - name: Cache playwright cache
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
-        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
+        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
        with:
          path: ~/.cache/ms-playwright
          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
@@ -626,7 +626,7 @@ jobs:

      - name: Cache playwright cache
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
-        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
+        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
        with:
          path: ~/.cache/ms-playwright
          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -56,7 +56,7 @@ jobs:

      - name: Cache mypy cache
        if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
-        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
+        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
        with:
          path: .mypy_cache
          key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'pyproject.toml') }}
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -31,7 +31,6 @@ jobs:
      - runner=4cpu-linux-arm64
      - "run-id=${{ github.run_id }}-model-check"
      - "extras=ecr-cache"
-    environment: ci-protected
    timeout-minutes: 45

    env:
--- a/.github/workflows/preview.yml
+++ b/.github/workflows/preview.yml
@@ -15,7 +15,6 @@ permissions:
 jobs:
  Deploy-Preview:
    runs-on: ubuntu-latest
-    environment: ci-protected
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
--- a/.github/workflows/release-cli.yml
+++ b/.github/workflows/release-cli.yml
@@ -13,6 +13,15 @@ jobs:
    permissions:
      id-token: write
    timeout-minutes: 10
+    strategy:
+      matrix:
+        os-arch:
+          - { goos: "linux", goarch: "amd64" }
+          - { goos: "linux", goarch: "arm64" }
+          - { goos: "windows", goarch: "amd64" }
+          - { goos: "windows", goarch: "arm64" }
+          - { goos: "darwin", goarch: "amd64" }
+          - { goos: "darwin", goarch: "arm64" }
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
@@ -22,11 +31,9 @@ jobs:
          enable-cache: false
          version: "0.9.9"
      - run: |
-          for goos in linux windows darwin; do
-            for goarch in amd64 arm64; do
-              GOOS="$goos" GOARCH="$goarch" uv build --wheel
-            done
-          done
+          GOOS="${{ matrix.os-arch.goos }}" \
+          GOARCH="${{ matrix.os-arch.goarch }}" \
+          uv build --wheel
        working-directory: cli
      - run: uv publish
        working-directory: cli
--- a/.github/workflows/storybook-deploy.yml
+++ b/.github/workflows/storybook-deploy.yml
@@ -25,7 +25,6 @@ permissions:
 jobs:
  Deploy-Storybook:
    runs-on: ubuntu-latest
-    environment: ci-protected
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v4
@@ -55,7 +54,6 @@ jobs:
    needs: Deploy-Storybook
    if: always() && needs.Deploy-Storybook.result == 'failure'
    runs-on: ubuntu-latest
-    environment: ci-protected
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v4
--- a/.github/workflows/sync_foss.yml
+++ b/.github/workflows/sync_foss.yml
@@ -9,7 +9,6 @@ on:
 jobs:
  sync-foss:
    runs-on: ubuntu-latest
-    environment: ci-protected
    timeout-minutes: 45
    permissions:
      contents: read
--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -11,7 +11,6 @@ permissions:
 jobs:
  create-and-push-tag:
    runs-on: ubuntu-slim
-    environment: ci-protected
    timeout-minutes: 45

    steps:
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -122,7 +122,7 @@ repos:
    rev: 5d1e709b7be35cb2025444e19de266b056b7b7ee # frozen: v2.10.1
    hooks:
      - id: golangci-lint
-        language_version: "1.26.1"
+        language_version: "1.26.0"
        entry: bash -c "find . -name go.mod -not -path './.venv/*' -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"

  - repo: https://github.com/astral-sh/ruff-pre-commit
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep
 > [!TIP]
 > Run Onyx with one command (or see deployment section below):
 > ```
-> curl -fsSL https://onyx.app/install_onyx.sh | bash
+> curl -fsSL https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.sh > install.sh && chmod +x install.sh && ./install.sh
 > ```

 ****
--- a/backend/alembic/versions/8188861f4e92_csv_to_tabular_chat_file_type.py
+++ b/backend/alembic/versions/8188861f4e92_csv_to_tabular_chat_file_type.py
@@ -0,0 +1,54 @@
+"""csv to tabular chat file type
+
+Revision ID: 8188861f4e92
+Revises: 1d78c0ca7853
+Create Date: 2026-03-31 19:23:05.753184
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "8188861f4e92"
+down_revision = "1d78c0ca7853"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.execute(
+        """
+        UPDATE chat_message
+        SET files = (
+            SELECT jsonb_agg(
+                CASE
+                    WHEN elem->>'type' = 'csv'
+                    THEN jsonb_set(elem, '{type}', '"tabular"')
+                    ELSE elem
+                END
+            )
+            FROM jsonb_array_elements(files) AS elem
+        )
+        WHERE files::text LIKE '%"type": "csv"%'
+        """
+    )
+
+
+def downgrade() -> None:
+    op.execute(
+        """
+        UPDATE chat_message
+        SET files = (
+            SELECT jsonb_agg(
+                CASE
+                    WHEN elem->>'type' = 'tabular'
+                    THEN jsonb_set(elem, '{type}', '"csv"')
+                    ELSE elem
+                END
+            )
+            FROM jsonb_array_elements(files) AS elem
+        )
+        WHERE files::text LIKE '%"type": "tabular"%'
+        """
+    )
--- a/backend/ee/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
@@ -28,7 +28,6 @@ from onyx.access.models import DocExternalAccess
 from onyx.access.models import ElementExternalAccess
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_find_task
-from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_queued_task_ids
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
@@ -188,6 +187,7 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None
    # (which lives on a different db number)
    r = get_redis_client()
    r_replica = get_redis_replica_client()
+    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_DOC_PERMISSIONS_SYNC_BEAT_LOCK,
@@ -227,7 +227,6 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None
            # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),
            # or be currently executing
            try:
-                r_celery = celery_get_broker_client(self.app)
                validate_permission_sync_fences(
                    tenant_id, r, r_replica, r_celery, lock_beat
                )
--- a/backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py
@@ -29,7 +29,6 @@ from ee.onyx.external_permissions.sync_params import (
 from ee.onyx.external_permissions.sync_params import get_source_perm_sync_config
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_find_task
-from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT
 from onyx.background.error_logging import emit_background_error
@@ -163,6 +162,7 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:
    # (which lives on a different db number)
    r = get_redis_client()
    r_replica = get_redis_replica_client()
+    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_EXTERNAL_GROUP_SYNC_BEAT_LOCK,
@@ -221,7 +221,6 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:
            # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),
            # or be currently executing
            try:
-                r_celery = celery_get_broker_client(self.app)
                validate_external_group_sync_fences(
                    tenant_id, self.app, r, r_replica, r_celery, lock_beat
                )
--- a/backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py
@@ -13,7 +13,6 @@ from redis.lock import Lock as RedisLock
 from ee.onyx.server.tenants.provisioning import setup_tenant
 from ee.onyx.server.tenants.schema_management import create_schema_if_not_exists
 from ee.onyx.server.tenants.schema_management import get_current_alembic_version
-from ee.onyx.server.tenants.schema_management import run_alembic_migrations
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.configs.app_configs import TARGET_AVAILABLE_TENANTS
 from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
@@ -30,10 +29,9 @@ from shared_configs.configs import TENANT_ID_PREFIX
 # Each tenant takes ~80s (alembic migrations), so 5 tenants ≈ 7 minutes.
 _MAX_TENANTS_PER_RUN = 5

-# Time limits sized for worst-case: provisioning up to _MAX_TENANTS_PER_RUN new tenants
-# (~90s each) plus migrating up to TARGET_AVAILABLE_TENANTS pool tenants (~90s each).
-_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 20  # 20 minutes
-_TENANT_PROVISIONING_TIME_LIMIT = 60 * 25  # 25 minutes
+# Time limits sized for worst-case batch: _MAX_TENANTS_PER_RUN × ~90s + buffer.
+_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 10  # 10 minutes
+_TENANT_PROVISIONING_TIME_LIMIT = 60 * 15  # 15 minutes


@shared_task(
@@ -93,7 +91,8 @@ def check_available_tenants(self: Task) -> None:  # noqa: ARG001
        batch_size = min(tenants_to_provision, _MAX_TENANTS_PER_RUN)
        if batch_size < tenants_to_provision:
            task_logger.info(
-                f"Capping batch to {batch_size} (need {tenants_to_provision}, will catch up next cycle)"
+                f"Capping batch to {batch_size} "
+                f"(need {tenants_to_provision}, will catch up next cycle)"
            )

        provisioned = 0
@@ -104,14 +103,12 @@ def check_available_tenants(self: Task) -> None:  # noqa: ARG001
                    provisioned += 1
            except Exception:
                task_logger.exception(
-                    f"Failed to provision tenant {i + 1}/{batch_size}, continuing with remaining tenants"
+                    f"Failed to provision tenant {i + 1}/{batch_size}, "
+                    "continuing with remaining tenants"
                )

        task_logger.info(f"Provisioning complete: {provisioned}/{batch_size} succeeded")

-        # Migrate any pool tenants that were provisioned before a new migration was deployed
-        _migrate_stale_pool_tenants()
-
    except Exception:
        task_logger.exception("Error in check_available_tenants task")

@@ -124,46 +121,6 @@ def check_available_tenants(self: Task) -> None:  # noqa: ARG001
            )


-def _migrate_stale_pool_tenants() -> None:
-    """
-    Run alembic upgrade head on all pool tenants. Since alembic upgrade head is
-    idempotent, tenants already at head are a fast no-op. This ensures pool
-    tenants are always current so that signup doesn't hit schema mismatches
-    (e.g. missing columns added after the tenant was pre-provisioned).
-    """
-    with get_session_with_shared_schema() as db_session:
-        pool_tenants = db_session.query(AvailableTenant).all()
-        tenant_ids = [t.tenant_id for t in pool_tenants]
-
-    if not tenant_ids:
-        return
-
-    task_logger.info(
-        f"Checking {len(tenant_ids)} pool tenant(s) for pending migrations"
-    )
-
-    for tenant_id in tenant_ids:
-        try:
-            run_alembic_migrations(tenant_id)
-            new_version = get_current_alembic_version(tenant_id)
-            with get_session_with_shared_schema() as db_session:
-                tenant = (
-                    db_session.query(AvailableTenant)
-                    .filter_by(tenant_id=tenant_id)
-                    .first()
-                )
-                if tenant and tenant.alembic_version != new_version:
-                    task_logger.info(
-                        f"Migrated pool tenant {tenant_id}: {tenant.alembic_version} -> {new_version}"
-                    )
-                    tenant.alembic_version = new_version
-                    db_session.commit()
-        except Exception:
-            task_logger.exception(
-                f"Failed to migrate pool tenant {tenant_id}, skipping"
-            )
-
-
 def pre_provision_tenant() -> bool:
    """
    Pre-provision a new tenant and store it in the NewAvailableTenant table.
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -99,26 +99,6 @@ async def get_or_provision_tenant(
        tenant_id = await get_available_tenant()

        if tenant_id:
-            # Run migrations to ensure the pre-provisioned tenant schema is current.
-            # Pool tenants may have been created before a new migration was deployed.
-            # Capture as a non-optional local so mypy can type the lambda correctly.
-            _tenant_id: str = tenant_id
-            loop = asyncio.get_running_loop()
-            try:
-                await loop.run_in_executor(
-                    None, lambda: run_alembic_migrations(_tenant_id)
-                )
-            except Exception:
-                # The tenant was already dequeued from the pool — roll it back so
-                # it doesn't end up orphaned (schema exists, but not assigned to anyone).
-                logger.exception(
-                    f"Migration failed for pre-provisioned tenant {_tenant_id}; rolling back"
-                )
-                try:
-                    await rollback_tenant_provisioning(_tenant_id)
-                except Exception:
-                    logger.exception(f"Failed to rollback orphaned tenant {_tenant_id}")
-                raise
            # If we have a pre-provisioned tenant, assign it to the user
            await assign_tenant_to_user(tenant_id, email, referral_source)
            logger.info(f"Assigned pre-provisioned tenant {tenant_id} to user {email}")
--- a/backend/model_server/main.py
+++ b/backend/model_server/main.py
@@ -100,7 +100,6 @@ def get_model_app() -> FastAPI:
            dsn=SENTRY_DSN,
            integrations=[StarletteIntegration(), FastApiIntegration()],
            traces_sample_rate=0.1,
-            release=__version__,
        )
        logger.info("Sentry initialized")
    else:
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -20,7 +20,6 @@ from sentry_sdk.integrations.celery import CeleryIntegration
 from sqlalchemy import text
 from sqlalchemy.orm import Session

-from onyx import __version__
 from onyx.background.celery.apps.task_formatters import CeleryTaskColoredFormatter
 from onyx.background.celery.apps.task_formatters import CeleryTaskPlainFormatter
 from onyx.background.celery.celery_utils import celery_is_worker_primary
@@ -66,7 +65,6 @@ if SENTRY_DSN:
        dsn=SENTRY_DSN,
        integrations=[CeleryIntegration()],
        traces_sample_rate=0.1,
-        release=__version__,
    )
    logger.info("Sentry initialized")
 else:
@@ -517,8 +515,7 @@ def reset_tenant_id(


 def wait_for_vespa_or_shutdown(
-    sender: Any,  # noqa: ARG001
-    **kwargs: Any,  # noqa: ARG001
+    sender: Any, **kwargs: Any  # noqa: ARG001
 ) -> None:  # noqa: ARG001
    """Waits for Vespa to become ready subject to a timeout.
    Raises WorkerShutdown if the timeout is reached."""
--- a/backend/onyx/background/celery/celery_redis.py
+++ b/backend/onyx/background/celery/celery_redis.py
@@ -1,6 +1,5 @@
 # These are helper objects for tracking the keys we need to write in redis
 import json
-import threading
 from typing import Any
 from typing import cast

@@ -8,59 +7,7 @@ from celery import Celery
 from redis import Redis

 from onyx.background.celery.configs.base import CELERY_SEPARATOR
-from onyx.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL
 from onyx.configs.constants import OnyxCeleryPriority
-from onyx.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS
-
-
-_broker_client: Redis | None = None
-_broker_url: str | None = None
-_broker_client_lock = threading.Lock()
-
-
-def celery_get_broker_client(app: Celery) -> Redis:
-    """Return a shared Redis client connected to the Celery broker DB.
-
-    Uses a module-level singleton so all tasks on a worker share one
-    connection instead of creating a new one per call. The client
-    connects directly to the broker Redis DB (parsed from the broker URL).
-
-    Thread-safe via lock — safe for use in Celery thread-pool workers.
-
-    Usage:
-        r_celery = celery_get_broker_client(self.app)
-        length = celery_get_queue_length(queue, r_celery)
-    """
-    global _broker_client, _broker_url
-    with _broker_client_lock:
-        url = app.conf.broker_url
-        if _broker_client is not None and _broker_url == url:
-            try:
-                _broker_client.ping()
-                return _broker_client
-            except Exception:
-                try:
-                    _broker_client.close()
-                except Exception:
-                    pass
-                _broker_client = None
-        elif _broker_client is not None:
-            try:
-                _broker_client.close()
-            except Exception:
-                pass
-            _broker_client = None
-
-        _broker_url = url
-        _broker_client = Redis.from_url(
-            url,
-            decode_responses=False,
-            health_check_interval=REDIS_HEALTH_CHECK_INTERVAL,
-            socket_keepalive=True,
-            socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS,
-            retry_on_timeout=True,
-        )
-        return _broker_client


 def celery_get_unacked_length(r: Redis) -> int:
--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -14,7 +14,6 @@ from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
-from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_queued_task_ids
 from onyx.configs.app_configs import JOB_TIMEOUT
@@ -133,6 +132,7 @@ def revoke_tasks_blocking_deletion(
 def check_for_connector_deletion_task(self: Task, *, tenant_id: str) -> bool | None:
    r = get_redis_client()
    r_replica = get_redis_replica_client()
+    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK,
@@ -149,7 +149,6 @@ def check_for_connector_deletion_task(self: Task, *, tenant_id: str) -> bool | N
        if not r.exists(OnyxRedisSignals.BLOCK_VALIDATE_CONNECTOR_DELETION_FENCES):
            # clear fences that don't have associated celery tasks in progress
            try:
-                r_celery = celery_get_broker_client(self.app)
                validate_connector_deletion_fences(
                    tenant_id, r, r_replica, r_celery, lock_beat
                )
--- a/backend/onyx/background/celery/tasks/docfetching/tasks.py
+++ b/backend/onyx/background/celery/tasks/docfetching/tasks.py
@@ -9,7 +9,6 @@ from celery import Celery
 from celery import shared_task
 from celery import Task

-from onyx import __version__
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.memory_monitoring import emit_process_memory
 from onyx.background.celery.tasks.docprocessing.heartbeat import start_heartbeat
@@ -138,7 +137,6 @@ def _docfetching_task(
        sentry_sdk.init(
            dsn=SENTRY_DSN,
            traces_sample_rate=0.1,
-            release=__version__,
        )
        logger.info("Sentry initialized")
    else:
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -22,7 +22,6 @@ from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_find_task
-from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.background.celery.celery_utils import httpx_init_vespa_pool
 from onyx.background.celery.memory_monitoring import emit_process_memory
@@ -319,11 +318,6 @@ def monitor_indexing_attempt_progress(
    )

    current_db_time = get_db_current_time(db_session)
-    total_batches: int | str = (
-        coordination_status.total_batches
-        if coordination_status.total_batches is not None
-        else "?"
-    )
    if coordination_status.found:
        task_logger.info(
            f"Indexing attempt progress: "
@@ -331,7 +325,7 @@ def monitor_indexing_attempt_progress(
            f"cc_pair={attempt.connector_credential_pair_id} "
            f"search_settings={attempt.search_settings_id} "
            f"completed_batches={coordination_status.completed_batches} "
-            f"total_batches={total_batches} "
+            f"total_batches={coordination_status.total_batches or '?'} "
            f"total_docs={coordination_status.total_docs} "
            f"total_failures={coordination_status.total_failures}"
            f"elapsed={(current_db_time - attempt.time_created).seconds}"
@@ -455,7 +449,7 @@ def check_indexing_completion(
            ):
                # Check if the task exists in the celery queue
                # This handles the case where Redis dies after task creation but before task execution
-                redis_celery = celery_get_broker_client(task.app)
+                redis_celery = task.app.broker_connection().channel().client  # type: ignore
                task_exists = celery_find_task(
                    attempt.celery_task_id,
                    OnyxCeleryQueues.CONNECTOR_DOC_FETCHING,
--- a/backend/onyx/background/celery/tasks/monitoring/tasks.py
+++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py
@@ -1,5 +1,6 @@
 import json
 import time
+from collections.abc import Callable
 from datetime import timedelta
 from itertools import islice
 from typing import Any
@@ -18,7 +19,6 @@ from sqlalchemy import text
 from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
-from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.background.celery.memory_monitoring import emit_process_memory
@@ -698,27 +698,31 @@ def monitor_background_processes(self: Task, *, tenant_id: str) -> None:
        return None

    try:
+        # Get Redis client for Celery broker
+        redis_celery = self.app.broker_connection().channel().client  # type: ignore
        redis_std = get_redis_client()

-        # Collect queue metrics with broker connection
-        r_celery = celery_get_broker_client(self.app)
-        queue_metrics = _collect_queue_metrics(r_celery)
+        # Define metric collection functions and their dependencies
+        metric_functions: list[Callable[[], list[Metric]]] = [
+            lambda: _collect_queue_metrics(redis_celery),
+            lambda: _collect_connector_metrics(db_session, redis_std),
+            lambda: _collect_sync_metrics(db_session, redis_std),
+        ]

-        # Collect remaining metrics (no broker connection needed)
+        # Collect and log each metric
        with get_session_with_current_tenant() as db_session:
-            all_metrics: list[Metric] = queue_metrics
-            all_metrics.extend(_collect_connector_metrics(db_session, redis_std))
-            all_metrics.extend(_collect_sync_metrics(db_session, redis_std))
+            for metric_fn in metric_functions:
+                metrics = metric_fn()
+                for metric in metrics:
+                    # double check to make sure we aren't double-emitting metrics
+                    if metric.key is None or not _has_metric_been_emitted(
+                        redis_std, metric.key
+                    ):
+                        metric.log()
+                        metric.emit(tenant_id)

-            for metric in all_metrics:
-                if metric.key is None or not _has_metric_been_emitted(
-                    redis_std, metric.key
-                ):
-                    metric.log()
-                    metric.emit(tenant_id)
-
-                if metric.key is not None:
-                    _mark_metric_as_emitted(redis_std, metric.key)
+                    if metric.key is not None:
+                        _mark_metric_as_emitted(redis_std, metric.key)

        task_logger.info("Successfully collected background metrics")
    except SoftTimeLimitExceeded:
@@ -886,7 +890,7 @@ def monitor_celery_queues_helper(
 ) -> None:
    """A task to monitor all celery queue lengths."""

-    r_celery = celery_get_broker_client(task.app)
+    r_celery = task.app.broker_connection().channel().client  # type: ignore
    n_celery = celery_get_queue_length(OnyxCeleryQueues.PRIMARY, r_celery)
    n_docfetching = celery_get_queue_length(
        OnyxCeleryQueues.CONNECTOR_DOC_FETCHING, r_celery
@@ -1076,7 +1080,7 @@ def cloud_monitor_celery_pidbox(
    num_deleted = 0

    MAX_PIDBOX_IDLE = 24 * 3600  # 1 day in seconds
-    r_celery = celery_get_broker_client(self.app)
+    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore
    for key in r_celery.scan_iter("*.reply.celery.pidbox"):
        key_bytes = cast(bytes, key)
        key_str = key_bytes.decode("utf-8")
--- a/backend/onyx/background/celery/tasks/pruning/tasks.py
+++ b/backend/onyx/background/celery/tasks/pruning/tasks.py
@@ -17,7 +17,6 @@ from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_find_task
-from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_queued_task_ids
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
@@ -204,6 +203,7 @@ def _is_pruning_due(cc_pair: ConnectorCredentialPair) -> bool:
 def check_for_pruning(self: Task, *, tenant_id: str) -> bool | None:
    r = get_redis_client()
    r_replica = get_redis_replica_client()
+    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_PRUNE_BEAT_LOCK,
@@ -261,7 +261,6 @@ def check_for_pruning(self: Task, *, tenant_id: str) -> bool | None:
            # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),
            # or be currently executing
            try:
-                r_celery = celery_get_broker_client(self.app)
                validate_pruning_fences(tenant_id, r, r_replica, r_celery, lock_beat)
            except Exception:
                task_logger.exception("Exception while validating pruning fences")
--- a/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
+++ b/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
@@ -16,7 +16,6 @@ from sqlalchemy.orm import Session

 from onyx.access.access import build_access_for_user_files
 from onyx.background.celery.apps.app_base import task_logger
-from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_utils import httpx_init_vespa_pool
 from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
@@ -106,7 +105,7 @@ def _user_file_delete_queued_key(user_file_id: str | UUID) -> str:


 def get_user_file_project_sync_queue_depth(celery_app: Celery) -> int:
-    redis_celery = celery_get_broker_client(celery_app)
+    redis_celery: Redis = celery_app.broker_connection().channel().client  # type: ignore
    return celery_get_queue_length(
        OnyxCeleryQueues.USER_FILE_PROJECT_SYNC, redis_celery
    )
@@ -239,7 +238,7 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
    skipped_guard = 0
    try:
        # --- Protection 1: queue depth backpressure ---
-        r_celery = celery_get_broker_client(self.app)
+        r_celery = self.app.broker_connection().channel().client  # type: ignore
        queue_len = celery_get_queue_length(
            OnyxCeleryQueues.USER_FILE_PROCESSING, r_celery
        )
@@ -592,7 +591,7 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
        # --- Protection 1: queue depth backpressure ---
        # NOTE: must use the broker's Redis client (not redis_client) because
        # Celery queues live on a separate Redis DB with CELERY_SEPARATOR keys.
-        r_celery = celery_get_broker_client(self.app)
+        r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore
        queue_len = celery_get_queue_length(OnyxCeleryQueues.USER_FILE_DELETE, r_celery)
        if queue_len > USER_FILE_DELETE_MAX_QUEUE_DEPTH:
            task_logger.warning(
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -805,10 +805,6 @@ MINI_CHUNK_SIZE = 150
 # This is the number of regular chunks per large chunk
 LARGE_CHUNK_RATIO = 4

-# The maximum number of chunks that can be held for 1 document processing batch
-# The purpose of this is to set an upper bound on memory usage
-MAX_CHUNKS_PER_DOC_BATCH = int(os.environ.get("MAX_CHUNKS_PER_DOC_BATCH") or 1000)
-
 # Include the document level metadata in each chunk. If the metadata is too long, then it is thrown out
 # We don't want the metadata to overwhelm the actual contents of the chunk
 SKIP_METADATA_IN_CHUNK = os.environ.get("SKIP_METADATA_IN_CHUNK", "").lower() == "true"
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -212,7 +212,6 @@ class DocumentSource(str, Enum):
    PRODUCTBOARD = "productboard"
    FILE = "file"
    CODA = "coda"
-    CANVAS = "canvas"
    NOTION = "notion"
    ZULIP = "zulip"
    LINEAR = "linear"
@@ -673,7 +672,6 @@ DocumentSourceDescription: dict[DocumentSource, str] = {
    DocumentSource.SLAB: "slab data",
    DocumentSource.PRODUCTBOARD: "productboard data (boards, etc.)",
    DocumentSource.FILE: "files",
-    DocumentSource.CANVAS: "canvas lms - courses, pages, assignments, and announcements",
    DocumentSource.CODA: "coda - team workspace with docs, tables, and pages",
    DocumentSource.NOTION: "notion data - a workspace that combines note-taking, \
 project management, and collaboration tools into a single, customizable platform",
--- a/backend/onyx/connectors/canvas/access.py
+++ b/backend/onyx/connectors/canvas/access.py
@@ -1,32 +0,0 @@
-"""
-Permissioning / AccessControl logic for Canvas courses.
-
-CE stub — returns None (no permissions). The EE implementation is loaded
-at runtime via ``fetch_versioned_implementation``.
-"""
-
-from collections.abc import Callable
-from typing import cast
-
-from onyx.access.models import ExternalAccess
-from onyx.connectors.canvas.client import CanvasApiClient
-from onyx.utils.variable_functionality import fetch_versioned_implementation
-from onyx.utils.variable_functionality import global_version
-
-
-def get_course_permissions(
-    canvas_client: CanvasApiClient,
-    course_id: int,
-) -> ExternalAccess | None:
-    if not global_version.is_ee_version():
-        return None
-
-    ee_get_course_permissions = cast(
-        Callable[[CanvasApiClient, int], ExternalAccess | None],
-        fetch_versioned_implementation(
-            "onyx.external_permissions.canvas.access",
-            "get_course_permissions",
-        ),
-    )
-
-    return ee_get_course_permissions(canvas_client, course_id)
--- a/backend/onyx/connectors/canvas/client.py
+++ b/backend/onyx/connectors/canvas/client.py
@@ -2,7 +2,6 @@ from __future__ import annotations

 import logging
 import re
-from collections.abc import Iterator
 from typing import Any
 from urllib.parse import urlparse

@@ -191,22 +190,3 @@ class CanvasApiClient:
        if clean_endpoint:
            final_url += "/" + clean_endpoint
        return final_url
-
-    def paginate(
-        self,
-        endpoint: str,
-        params: dict[str, Any] | None = None,
-    ) -> Iterator[list[Any]]:
-        """Yield each page of results, following Link-header pagination.
-
-        Makes the first request with endpoint + params, then follows
-        next_url from Link headers for subsequent pages.
-        """
-        response, next_url = self.get(endpoint, params=params)
-        while True:
-            if not response:
-                break
-            yield response
-            if not next_url:
-                break
-            response, next_url = self.get(full_url=next_url)
--- a/backend/onyx/connectors/canvas/connector.py
+++ b/backend/onyx/connectors/canvas/connector.py
@@ -1,82 +1,17 @@
-from datetime import datetime
-from datetime import timezone
-from typing import Any
-from typing import cast
 from typing import Literal
-from typing import NoReturn
 from typing import TypeAlias

 from pydantic import BaseModel
-from retry import retry
-from typing_extensions import override

-from onyx.access.models import ExternalAccess
-from onyx.configs.app_configs import INDEX_BATCH_SIZE
-from onyx.configs.constants import DocumentSource
-from onyx.connectors.canvas.access import get_course_permissions
-from onyx.connectors.canvas.client import CanvasApiClient
-from onyx.connectors.exceptions import ConnectorValidationError
-from onyx.connectors.exceptions import CredentialExpiredError
-from onyx.connectors.exceptions import InsufficientPermissionsError
-from onyx.connectors.exceptions import UnexpectedValidationError
-from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
-from onyx.connectors.interfaces import CheckpointOutput
-from onyx.connectors.interfaces import GenerateSlimDocumentOutput
-from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
 from onyx.connectors.models import ConnectorCheckpoint
-from onyx.connectors.models import ConnectorMissingCredentialError
-from onyx.connectors.models import Document
-from onyx.connectors.models import ImageSection
-from onyx.connectors.models import TextSection
-from onyx.error_handling.exceptions import OnyxError
-from onyx.file_processing.html_utils import parse_html_page_basic
-from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def _handle_canvas_api_error(e: OnyxError) -> NoReturn:
-    """Map Canvas API errors to connector framework exceptions."""
-    if e.status_code == 401:
-        raise CredentialExpiredError(
-            "Canvas API token is invalid or expired (HTTP 401)."
-        )
-    elif e.status_code == 403:
-        raise InsufficientPermissionsError(
-            "Canvas API token does not have sufficient permissions (HTTP 403)."
-        )
-    elif e.status_code == 429:
-        raise ConnectorValidationError(
-            "Canvas rate-limit exceeded (HTTP 429). Please try again later."
-        )
-    elif e.status_code >= 500:
-        raise UnexpectedValidationError(
-            f"Unexpected Canvas HTTP error (status={e.status_code}): {e}"
-        )
-    else:
-        raise ConnectorValidationError(
-            f"Canvas API error (status={e.status_code}): {e}"
-        )


 class CanvasCourse(BaseModel):
    id: int
-    name: str | None = None
-    course_code: str | None = None
-    created_at: str | None = None
-    workflow_state: str | None = None
-
-    @classmethod
-    def from_api(cls, payload: dict[str, Any]) -> "CanvasCourse":
-        return cls(
-            id=payload["id"],
-            name=payload.get("name"),
-            course_code=payload.get("course_code"),
-            created_at=payload.get("created_at"),
-            workflow_state=payload.get("workflow_state"),
-        )
+    name: str
+    course_code: str
+    created_at: str
+    workflow_state: str


 class CanvasPage(BaseModel):
@@ -84,22 +19,10 @@ class CanvasPage(BaseModel):
    url: str
    title: str
    body: str | None = None
-    created_at: str | None = None
-    updated_at: str | None = None
+    created_at: str
+    updated_at: str
    course_id: int

-    @classmethod
-    def from_api(cls, payload: dict[str, Any], course_id: int) -> "CanvasPage":
-        return cls(
-            page_id=payload["page_id"],
-            url=payload["url"],
-            title=payload["title"],
-            body=payload.get("body"),
-            created_at=payload.get("created_at"),
-            updated_at=payload.get("updated_at"),
-            course_id=course_id,
-        )
-

 class CanvasAssignment(BaseModel):
    id: int
@@ -107,23 +30,10 @@ class CanvasAssignment(BaseModel):
    description: str | None = None
    html_url: str
    course_id: int
-    created_at: str | None = None
-    updated_at: str | None = None
+    created_at: str
+    updated_at: str
    due_at: str | None = None

-    @classmethod
-    def from_api(cls, payload: dict[str, Any], course_id: int) -> "CanvasAssignment":
-        return cls(
-            id=payload["id"],
-            name=payload["name"],
-            description=payload.get("description"),
-            html_url=payload["html_url"],
-            course_id=course_id,
-            created_at=payload.get("created_at"),
-            updated_at=payload.get("updated_at"),
-            due_at=payload.get("due_at"),
-        )
-

 class CanvasAnnouncement(BaseModel):
    id: int
@@ -133,17 +43,6 @@ class CanvasAnnouncement(BaseModel):
    posted_at: str | None = None
    course_id: int

-    @classmethod
-    def from_api(cls, payload: dict[str, Any], course_id: int) -> "CanvasAnnouncement":
-        return cls(
-            id=payload["id"],
-            title=payload["title"],
-            message=payload.get("message"),
-            html_url=payload["html_url"],
-            posted_at=payload.get("posted_at"),
-            course_id=course_id,
-        )
-

 CanvasStage: TypeAlias = Literal["pages", "assignments", "announcements"]

@@ -173,286 +72,3 @@ class CanvasConnectorCheckpoint(ConnectorCheckpoint):
        self.current_course_index += 1
        self.stage = "pages"
        self.next_url = None
-
-
-class CanvasConnector(
-    CheckpointedConnectorWithPermSync[CanvasConnectorCheckpoint],
-    SlimConnectorWithPermSync,
-):
-    def __init__(
-        self,
-        canvas_base_url: str,
-        batch_size: int = INDEX_BATCH_SIZE,
-    ) -> None:
-        self.canvas_base_url = canvas_base_url.rstrip("/").removesuffix("/api/v1")
-        self.batch_size = batch_size
-        self._canvas_client: CanvasApiClient | None = None
-        self._course_permissions_cache: dict[int, ExternalAccess | None] = {}
-
-    @property
-    def canvas_client(self) -> CanvasApiClient:
-        if self._canvas_client is None:
-            raise ConnectorMissingCredentialError("Canvas")
-        return self._canvas_client
-
-    def _get_course_permissions(self, course_id: int) -> ExternalAccess | None:
-        """Get course permissions with caching."""
-        if course_id not in self._course_permissions_cache:
-            self._course_permissions_cache[course_id] = get_course_permissions(
-                canvas_client=self.canvas_client,
-                course_id=course_id,
-            )
-        return self._course_permissions_cache[course_id]
-
-    @retry(tries=3, delay=1, backoff=2)
-    def _list_courses(self) -> list[CanvasCourse]:
-        """Fetch all courses accessible to the authenticated user."""
-        logger.debug("Fetching Canvas courses")
-
-        courses: list[CanvasCourse] = []
-        for page in self.canvas_client.paginate(
-            "courses", params={"per_page": "100", "state[]": "available"}
-        ):
-            courses.extend(CanvasCourse.from_api(c) for c in page)
-        return courses
-
-    @retry(tries=3, delay=1, backoff=2)
-    def _list_pages(self, course_id: int) -> list[CanvasPage]:
-        """Fetch all pages for a given course."""
-        logger.debug(f"Fetching pages for course {course_id}")
-
-        pages: list[CanvasPage] = []
-        for page in self.canvas_client.paginate(
-            f"courses/{course_id}/pages",
-            params={"per_page": "100", "include[]": "body", "published": "true"},
-        ):
-            pages.extend(CanvasPage.from_api(p, course_id=course_id) for p in page)
-        return pages
-
-    @retry(tries=3, delay=1, backoff=2)
-    def _list_assignments(self, course_id: int) -> list[CanvasAssignment]:
-        """Fetch all assignments for a given course."""
-        logger.debug(f"Fetching assignments for course {course_id}")
-
-        assignments: list[CanvasAssignment] = []
-        for page in self.canvas_client.paginate(
-            f"courses/{course_id}/assignments",
-            params={"per_page": "100", "published": "true"},
-        ):
-            assignments.extend(
-                CanvasAssignment.from_api(a, course_id=course_id) for a in page
-            )
-        return assignments
-
-    @retry(tries=3, delay=1, backoff=2)
-    def _list_announcements(self, course_id: int) -> list[CanvasAnnouncement]:
-        """Fetch all announcements for a given course."""
-        logger.debug(f"Fetching announcements for course {course_id}")
-
-        announcements: list[CanvasAnnouncement] = []
-        for page in self.canvas_client.paginate(
-            "announcements",
-            params={
-                "per_page": "100",
-                "context_codes[]": f"course_{course_id}",
-                "active_only": "true",
-            },
-        ):
-            announcements.extend(
-                CanvasAnnouncement.from_api(a, course_id=course_id) for a in page
-            )
-        return announcements
-
-    def _build_document(
-        self,
-        doc_id: str,
-        link: str,
-        text: str,
-        semantic_identifier: str,
-        doc_updated_at: datetime | None,
-        course_id: int,
-        doc_type: str,
-    ) -> Document:
-        """Build a Document with standard Canvas fields."""
-        return Document(
-            id=doc_id,
-            sections=cast(
-                list[TextSection | ImageSection],
-                [TextSection(link=link, text=text)],
-            ),
-            source=DocumentSource.CANVAS,
-            semantic_identifier=semantic_identifier,
-            doc_updated_at=doc_updated_at,
-            metadata={"course_id": str(course_id), "type": doc_type},
-        )
-
-    def _convert_page_to_document(self, page: CanvasPage) -> Document:
-        """Convert a Canvas page to a Document."""
-        link = f"{self.canvas_base_url}/courses/{page.course_id}/pages/{page.url}"
-
-        text_parts = [page.title]
-        body_text = parse_html_page_basic(page.body) if page.body else ""
-        if body_text:
-            text_parts.append(body_text)
-
-        doc_updated_at = (
-            datetime.fromisoformat(page.updated_at.replace("Z", "+00:00")).astimezone(
-                timezone.utc
-            )
-            if page.updated_at
-            else None
-        )
-
-        document = self._build_document(
-            doc_id=f"canvas-page-{page.course_id}-{page.page_id}",
-            link=link,
-            text="\n\n".join(text_parts),
-            semantic_identifier=page.title or f"Page {page.page_id}",
-            doc_updated_at=doc_updated_at,
-            course_id=page.course_id,
-            doc_type="page",
-        )
-        return document
-
-    def _convert_assignment_to_document(self, assignment: CanvasAssignment) -> Document:
-        """Convert a Canvas assignment to a Document."""
-        text_parts = [assignment.name]
-        desc_text = (
-            parse_html_page_basic(assignment.description)
-            if assignment.description
-            else ""
-        )
-        if desc_text:
-            text_parts.append(desc_text)
-        if assignment.due_at:
-            due_dt = datetime.fromisoformat(
-                assignment.due_at.replace("Z", "+00:00")
-            ).astimezone(timezone.utc)
-            text_parts.append(f"Due: {due_dt.strftime('%B %d, %Y %H:%M UTC')}")
-
-        doc_updated_at = (
-            datetime.fromisoformat(
-                assignment.updated_at.replace("Z", "+00:00")
-            ).astimezone(timezone.utc)
-            if assignment.updated_at
-            else None
-        )
-
-        document = self._build_document(
-            doc_id=f"canvas-assignment-{assignment.course_id}-{assignment.id}",
-            link=assignment.html_url,
-            text="\n\n".join(text_parts),
-            semantic_identifier=assignment.name or f"Assignment {assignment.id}",
-            doc_updated_at=doc_updated_at,
-            course_id=assignment.course_id,
-            doc_type="assignment",
-        )
-        return document
-
-    def _convert_announcement_to_document(
-        self, announcement: CanvasAnnouncement
-    ) -> Document:
-        """Convert a Canvas announcement to a Document."""
-        text_parts = [announcement.title]
-        msg_text = (
-            parse_html_page_basic(announcement.message) if announcement.message else ""
-        )
-        if msg_text:
-            text_parts.append(msg_text)
-
-        doc_updated_at = (
-            datetime.fromisoformat(
-                announcement.posted_at.replace("Z", "+00:00")
-            ).astimezone(timezone.utc)
-            if announcement.posted_at
-            else None
-        )
-
-        document = self._build_document(
-            doc_id=f"canvas-announcement-{announcement.course_id}-{announcement.id}",
-            link=announcement.html_url,
-            text="\n\n".join(text_parts),
-            semantic_identifier=announcement.title or f"Announcement {announcement.id}",
-            doc_updated_at=doc_updated_at,
-            course_id=announcement.course_id,
-            doc_type="announcement",
-        )
-        return document
-
-    @override
-    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
-        """Load and validate Canvas credentials."""
-        access_token = credentials.get("canvas_access_token")
-        if not access_token:
-            raise ConnectorMissingCredentialError("Canvas")
-
-        try:
-            client = CanvasApiClient(
-                bearer_token=access_token,
-                canvas_base_url=self.canvas_base_url,
-            )
-            client.get("courses", params={"per_page": "1"})
-        except ValueError as e:
-            raise ConnectorValidationError(f"Invalid Canvas base URL: {e}")
-        except OnyxError as e:
-            _handle_canvas_api_error(e)
-
-        self._canvas_client = client
-        return None
-
-    @override
-    def validate_connector_settings(self) -> None:
-        """Validate Canvas connector settings by testing API access."""
-        try:
-            self.canvas_client.get("courses", params={"per_page": "1"})
-            logger.info("Canvas connector settings validated successfully")
-        except OnyxError as e:
-            _handle_canvas_api_error(e)
-        except ConnectorMissingCredentialError:
-            raise
-        except Exception as exc:
-            raise UnexpectedValidationError(
-                f"Unexpected error during Canvas settings validation: {exc}"
-            )
-
-    @override
-    def load_from_checkpoint(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: CanvasConnectorCheckpoint,
-    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
-        # TODO(benwu408): implemented in PR3 (checkpoint)
-        raise NotImplementedError
-
-    @override
-    def load_from_checkpoint_with_perm_sync(
-        self,
-        start: SecondsSinceUnixEpoch,
-        end: SecondsSinceUnixEpoch,
-        checkpoint: CanvasConnectorCheckpoint,
-    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
-        # TODO(benwu408): implemented in PR3 (checkpoint)
-        raise NotImplementedError
-
-    @override
-    def build_dummy_checkpoint(self) -> CanvasConnectorCheckpoint:
-        # TODO(benwu408): implemented in PR3 (checkpoint)
-        raise NotImplementedError
-
-    @override
-    def validate_checkpoint_json(
-        self, checkpoint_json: str
-    ) -> CanvasConnectorCheckpoint:
-        # TODO(benwu408): implemented in PR3 (checkpoint)
-        raise NotImplementedError
-
-    @override
-    def retrieve_all_slim_docs_perm_sync(
-        self,
-        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
-        callback: IndexingHeartbeatInterface | None = None,
-    ) -> GenerateSlimDocumentOutput:
-        # TODO(benwu408): implemented in PR4 (perm sync)
-        raise NotImplementedError
--- a/backend/onyx/connectors/registry.py
+++ b/backend/onyx/connectors/registry.py
@@ -72,10 +72,6 @@ CONNECTOR_CLASS_MAP = {
        module_path="onyx.connectors.coda.connector",
        class_name="CodaConnector",
    ),
-    DocumentSource.CANVAS: ConnectorMapping(
-        module_path="onyx.connectors.canvas.connector",
-        class_name="CanvasConnector",
-    ),
    DocumentSource.NOTION: ConnectorMapping(
        module_path="onyx.connectors.notion.connector",
        class_name="NotionConnector",
--- a/backend/onyx/db/projects.py
+++ b/backend/onyx/db/projects.py
@@ -7,6 +7,7 @@ from fastapi import HTTPException
 from fastapi import UploadFile
 from pydantic import BaseModel
 from pydantic import ConfigDict
+from pydantic import Field
 from sqlalchemy import func
 from sqlalchemy.orm import Session
 from starlette.background import BackgroundTasks
@@ -34,9 +35,19 @@ class CategorizedFilesResult(BaseModel):
    user_files: list[UserFile]
    rejected_files: list[RejectedFile]
    id_to_temp_id: dict[str, str]
+    # Filenames that should be stored but not indexed.
+    skip_indexing_filenames: set[str] = Field(default_factory=set)
    # Allow SQLAlchemy ORM models inside this result container
    model_config = ConfigDict(arbitrary_types_allowed=True)

+    @property
+    def indexable_files(self) -> list[UserFile]:
+        return [
+            uf
+            for uf in self.user_files
+            if (uf.name or "") not in self.skip_indexing_filenames
+        ]
+

 def build_hashed_file_key(file: UploadFile) -> str:
    name_prefix = (file.filename or "")[:50]
@@ -98,6 +109,7 @@ def create_user_files(
        user_files=user_files,
        rejected_files=rejected_files,
        id_to_temp_id=id_to_temp_id,
+        skip_indexing_filenames=categorized_files.skip_indexing,
    )


@@ -123,6 +135,7 @@ def upload_files_to_user_files_with_indexing(
    user_files = categorized_files_result.user_files
    rejected_files = categorized_files_result.rejected_files
    id_to_temp_id = categorized_files_result.id_to_temp_id
+    indexable_files = categorized_files_result.indexable_files
    # Trigger per-file processing immediately for the current tenant
    tenant_id = get_current_tenant_id()
    for rejected_file in rejected_files:
@@ -134,12 +147,12 @@ def upload_files_to_user_files_with_indexing(
        from onyx.background.task_utils import drain_processing_loop

        background_tasks.add_task(drain_processing_loop, tenant_id)
-        for user_file in user_files:
+        for user_file in indexable_files:
            logger.info(f"Queued in-process processing for user_file_id={user_file.id}")
    else:
        from onyx.background.celery.versioned_apps.client import app as client_app

-        for user_file in user_files:
+        for user_file in indexable_files:
            task = client_app.send_task(
                OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
                kwargs={"user_file_id": user_file.id, "tenant_id": tenant_id},
@@ -155,6 +168,7 @@ def upload_files_to_user_files_with_indexing(
        user_files=user_files,
        rejected_files=rejected_files,
        id_to_temp_id=id_to_temp_id,
+        skip_indexing_filenames=categorized_files_result.skip_indexing_filenames,
    )


--- a/backend/onyx/document_index/disabled.py
+++ b/backend/onyx/document_index/disabled.py
@@ -5,7 +5,6 @@ accidentally reaches the vector DB layer will fail loudly instead of timing
 out against a nonexistent Vespa/OpenSearch instance.
 """

-from collections.abc import Iterable
 from typing import Any

 from onyx.context.search.models import IndexFilters
@@ -67,7 +66,7 @@ class DisabledDocumentIndex(DocumentIndex):
    # ------------------------------------------------------------------
    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],  # noqa: ARG002
+        chunks: list[DocMetadataAwareIndexChunk],  # noqa: ARG002
        index_batch_params: IndexBatchParams,  # noqa: ARG002
    ) -> set[DocumentInsertionRecord]:
        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)
--- a/backend/onyx/document_index/interfaces.py
+++ b/backend/onyx/document_index/interfaces.py
@@ -1,5 +1,4 @@
 import abc
-from collections.abc import Iterable
 from dataclasses import dataclass
 from datetime import datetime
 from typing import Any
@@ -207,7 +206,7 @@ class Indexable(abc.ABC):
    @abc.abstractmethod
    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],
+        chunks: list[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[DocumentInsertionRecord]:
        """
@@ -227,8 +226,8 @@ class Indexable(abc.ABC):
        it is done automatically outside of this code.

        Parameters:
-        - chunks: Document chunks with all of the information needed for
-                indexing to the document index.
+        - chunks: Document chunks with all of the information needed for indexing to the document
+                index.
        - tenant_id: The tenant id of the user whose chunks are being indexed
        - large_chunks_enabled: Whether large chunks are enabled

--- a/backend/onyx/document_index/interfaces_new.py
+++ b/backend/onyx/document_index/interfaces_new.py
@@ -1,5 +1,4 @@
 import abc
-from collections.abc import Iterable
 from typing import Self

 from pydantic import BaseModel
@@ -210,10 +209,10 @@ class Indexable(abc.ABC):
    @abc.abstractmethod
    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],
+        chunks: list[DocMetadataAwareIndexChunk],
        indexing_metadata: IndexingMetadata,
    ) -> list[DocumentInsertionRecord]:
-        """Indexes an iterable of document chunks into the document index.
+        """Indexes a list of document chunks into the document index.

        This is often a batch operation including chunks from multiple
        documents.
--- a/backend/onyx/document_index/opensearch/client.py
+++ b/backend/onyx/document_index/opensearch/client.py
@@ -932,7 +932,7 @@ class OpenSearchIndexClient(OpenSearchClient):
    def search_for_document_ids(
        self,
        body: dict[str, Any],
-        search_type: OpenSearchSearchType = OpenSearchSearchType.UNKNOWN,
+        search_type: OpenSearchSearchType = OpenSearchSearchType.DOCUMENT_IDS,
    ) -> list[str]:
        """Searches the index and returns only document chunk IDs.

--- a/backend/onyx/document_index/opensearch/constants.py
+++ b/backend/onyx/document_index/opensearch/constants.py
@@ -60,7 +60,8 @@ class OpenSearchSearchType(str, Enum):
    KEYWORD = "keyword"
    SEMANTIC = "semantic"
    RANDOM = "random"
-    DOC_ID_RETRIEVAL = "doc_id_retrieval"
+    ID_RETRIEVAL = "id_retrieval"
+    DOCUMENT_IDS = "document_ids"
    UNKNOWN = "unknown"


--- a/backend/onyx/document_index/opensearch/opensearch_document_index.py
+++ b/backend/onyx/document_index/opensearch/opensearch_document_index.py
@@ -1,12 +1,11 @@
 import json
-from collections.abc import Iterable
+from collections import defaultdict
 from typing import Any

 import httpx
 from opensearchpy import NotFoundError

 from onyx.access.models import DocumentAccess
-from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
 from onyx.configs.app_configs import VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT
 from onyx.configs.chat_configs import NUM_RETURNED_HITS
 from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
@@ -352,7 +351,7 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):

    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],
+        chunks: list[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[OldDocumentInsertionRecord]:
        """
@@ -648,10 +647,10 @@ class OpenSearchDocumentIndex(DocumentIndex):

    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],
-        indexing_metadata: IndexingMetadata,
+        chunks: list[DocMetadataAwareIndexChunk],
+        indexing_metadata: IndexingMetadata,  # noqa: ARG002
    ) -> list[DocumentInsertionRecord]:
-        """Indexes an iterable of document chunks into the document index.
+        """Indexes a list of document chunks into the document index.

        Groups chunks by document ID and for each document, deletes existing
        chunks and indexes the new chunks in bulk.
@@ -674,34 +673,29 @@ class OpenSearchDocumentIndex(DocumentIndex):
                document is newly indexed or had already existed and was just
                updated.
        """
-        total_chunks = sum(
-            cc.new_chunk_cnt
-            for cc in indexing_metadata.doc_id_to_chunk_cnt_diff.values()
+        # Group chunks by document ID.
+        doc_id_to_chunks: dict[str, list[DocMetadataAwareIndexChunk]] = defaultdict(
+            list
        )
+        for chunk in chunks:
+            doc_id_to_chunks[chunk.source_document.id].append(chunk)
        logger.debug(
-            f"[OpenSearchDocumentIndex] Indexing {total_chunks} chunks from {len(indexing_metadata.doc_id_to_chunk_cnt_diff)} "
+            f"[OpenSearchDocumentIndex] Indexing {len(chunks)} chunks from {len(doc_id_to_chunks)} "
            f"documents for index {self._index_name}."
        )

        document_indexing_results: list[DocumentInsertionRecord] = []
-        deleted_doc_ids: set[str] = set()
-        # Buffer chunks per document as they arrive from the iterable.
-        # When the document ID changes flush the buffered chunks.
-        current_doc_id: str | None = None
-        current_chunks: list[DocMetadataAwareIndexChunk] = []
-
-        def _flush_chunks(doc_chunks: list[DocMetadataAwareIndexChunk]) -> None:
-            assert len(doc_chunks) > 0, "doc_chunks is empty"
-
+        # Try to index per-document.
+        for _, chunks in doc_id_to_chunks.items():
            # Create a batch of OpenSearch-formatted chunks for bulk insertion.
-            # Since we are doing this in batches, an error occurring midway
-            # can result in a state where chunks are deleted and not all the
-            # new chunks have been indexed.
+            # Do this before deleting existing chunks to reduce the amount of
+            # time the document index has no content for a given document, and
+            # to reduce the chance of entering a state where we delete chunks,
+            # then some error happens, and never successfully index new chunks.
            chunk_batch: list[DocumentChunk] = [
-                _convert_onyx_chunk_to_opensearch_document(chunk)
-                for chunk in doc_chunks
+                _convert_onyx_chunk_to_opensearch_document(chunk) for chunk in chunks
            ]
-            onyx_document: Document = doc_chunks[0].source_document
+            onyx_document: Document = chunks[0].source_document
            # First delete the doc's chunks from the index. This is so that
            # there are no dangling chunks in the index, in the event that the
            # new document's content contains fewer chunks than the previous
@@ -710,43 +704,22 @@ class OpenSearchDocumentIndex(DocumentIndex):
            # if the chunk count has actually decreased. This assumes that
            # overlapping chunks are perfectly overwritten. If we can't
            # guarantee that then we need the code as-is.
-            if onyx_document.id not in deleted_doc_ids:
-                num_chunks_deleted = self.delete(
-                    onyx_document.id, onyx_document.chunk_count
-                )
-                deleted_doc_ids.add(onyx_document.id)
-                # If we see that chunks were deleted we assume the doc already
-                # existed. We record the result before bulk_index_documents
-                # runs. If indexing raises, this entire result list is discarded
-                # by the caller's retry logic, so early recording is safe.
-                document_indexing_results.append(
-                    DocumentInsertionRecord(
-                        document_id=onyx_document.id,
-                        already_existed=num_chunks_deleted > 0,
-                    )
-                )
+            num_chunks_deleted = self.delete(
+                onyx_document.id, onyx_document.chunk_count
+            )
+            # If we see that chunks were deleted we assume the doc already
+            # existed.
+            document_insertion_record = DocumentInsertionRecord(
+                document_id=onyx_document.id,
+                already_existed=num_chunks_deleted > 0,
+            )
            # Now index. This will raise if a chunk of the same ID exists, which
            # we do not expect because we should have deleted all chunks.
            self._client.bulk_index_documents(
                documents=chunk_batch,
                tenant_state=self._tenant_state,
            )
-
-        for chunk in chunks:
-            doc_id = chunk.source_document.id
-            if doc_id != current_doc_id:
-                if current_chunks:
-                    _flush_chunks(current_chunks)
-                current_doc_id = doc_id
-                current_chunks = [chunk]
-            elif len(current_chunks) >= MAX_CHUNKS_PER_DOC_BATCH:
-                _flush_chunks(current_chunks)
-                current_chunks = [chunk]
-            else:
-                current_chunks.append(chunk)
-
-        if current_chunks:
-            _flush_chunks(current_chunks)
+            document_indexing_results.append(document_insertion_record)

        return document_indexing_results

@@ -928,7 +901,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
            search_hits = self._client.search(
                body=query_body,
                search_pipeline_id=None,
-                search_type=OpenSearchSearchType.DOC_ID_RETRIEVAL,
+                search_type=OpenSearchSearchType.ID_RETRIEVAL,
            )
            inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
                _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@@ -6,7 +6,6 @@ import re
 import time
 import urllib
 import zipfile
-from collections.abc import Iterable
 from dataclasses import dataclass
 from datetime import datetime
 from datetime import timedelta
@@ -462,7 +461,7 @@ class VespaIndex(DocumentIndex):

    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],
+        chunks: list[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[OldDocumentInsertionRecord]:
        """
--- a/backend/onyx/document_index/vespa/vespa_document_index.py
+++ b/backend/onyx/document_index/vespa/vespa_document_index.py
@@ -1,8 +1,6 @@
 import concurrent.futures
 import logging
 import random
-from collections.abc import Generator
-from collections.abc import Iterable
 from typing import Any
 from uuid import UUID

@@ -10,7 +8,6 @@ import httpx
 from pydantic import BaseModel
 from retry import retry

-from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
 from onyx.configs.app_configs import RECENCY_BIAS_MULTIPLIER
 from onyx.configs.app_configs import RERANK_COUNT
 from onyx.configs.chat_configs import DOC_TIME_DECAY
@@ -321,7 +318,7 @@ class VespaDocumentIndex(DocumentIndex):

    def index(
        self,
-        chunks: Iterable[DocMetadataAwareIndexChunk],
+        chunks: list[DocMetadataAwareIndexChunk],
        indexing_metadata: IndexingMetadata,
    ) -> list[DocumentInsertionRecord]:
        doc_id_to_chunk_cnt_diff = indexing_metadata.doc_id_to_chunk_cnt_diff
@@ -341,31 +338,22 @@ class VespaDocumentIndex(DocumentIndex):

        # Vespa has restrictions on valid characters, yet document IDs come from
        # external w.r.t. this class. We need to sanitize them.
-        #
-        # Instead of materializing all cleaned chunks upfront, we stream them
-        # through a generator that cleans IDs and builds the original-ID mapping
-        # incrementally as chunks flow into Vespa.
-        def _clean_and_track(
-            chunks_iter: Iterable[DocMetadataAwareIndexChunk],
-            id_map: dict[str, str],
-            seen_ids: set[str],
-        ) -> Generator[DocMetadataAwareIndexChunk, None, None]:
-            """Cleans chunk IDs and builds the original-ID mapping
-            incrementally as chunks flow through, avoiding a separate
-            materialization pass."""
-            for chunk in chunks_iter:
-                original_id = chunk.source_document.id
-                cleaned = clean_chunk_id_copy(chunk)
-                cleaned_id = cleaned.source_document.id
-                # Needed so the final DocumentInsertionRecord returned can have
-                # the original document ID. cleaned_chunks might not contain IDs
-                # exactly as callers supplied them.
-                id_map[cleaned_id] = original_id
-                seen_ids.add(cleaned_id)
-                yield cleaned
+        cleaned_chunks: list[DocMetadataAwareIndexChunk] = [
+            clean_chunk_id_copy(chunk) for chunk in chunks
+        ]
+        assert len(cleaned_chunks) == len(
+            chunks
+        ), "Bug: Cleaned chunks and input chunks have different lengths."

-        new_document_id_to_original_document_id: dict[str, str] = {}
-        all_cleaned_doc_ids: set[str] = set()
+        # Needed so the final DocumentInsertionRecord returned can have the
+        # original document ID. cleaned_chunks might not contain IDs exactly as
+        # callers supplied them.
+        new_document_id_to_original_document_id: dict[str, str] = dict()
+        for i, cleaned_chunk in enumerate(cleaned_chunks):
+            old_chunk = chunks[i]
+            new_document_id_to_original_document_id[
+                cleaned_chunk.source_document.id
+            ] = old_chunk.source_document.id

        existing_docs: set[str] = set()

@@ -421,16 +409,8 @@ class VespaDocumentIndex(DocumentIndex):
                    executor=executor,
                )

-            # Insert new Vespa documents, streaming through the cleaning
-            # pipeline so chunks are never fully materialized.
-            cleaned_chunks = _clean_and_track(
-                chunks,
-                new_document_id_to_original_document_id,
-                all_cleaned_doc_ids,
-            )
-            for chunk_batch in batch_generator(
-                cleaned_chunks, min(BATCH_SIZE, MAX_CHUNKS_PER_DOC_BATCH)
-            ):
+            # Insert new Vespa documents.
+            for chunk_batch in batch_generator(cleaned_chunks, BATCH_SIZE):
                batch_index_vespa_chunks(
                    chunks=chunk_batch,
                    index_name=self._index_name,
@@ -439,6 +419,10 @@ class VespaDocumentIndex(DocumentIndex):
                    executor=executor,
                )

+        all_cleaned_doc_ids: set[str] = {
+            chunk.source_document.id for chunk in cleaned_chunks
+        }
+
        return [
            DocumentInsertionRecord(
                document_id=new_document_id_to_original_document_id[cleaned_doc_id],
--- a/backend/onyx/file_processing/extract_file_text.py
+++ b/backend/onyx/file_processing/extract_file_text.py
@@ -44,7 +44,6 @@ KNOWN_OPENPYXL_BUGS = [
    "Value must be either numerical or a string containing a wildcard",
    "File contains no valid workbook part",
    "Unable to read workbook: could not read stylesheet from None",
-    "Colors must be aRGB hex values",
 ]


--- a/backend/onyx/file_processing/file_types.py
+++ b/backend/onyx/file_processing/file_types.py
@@ -15,6 +15,7 @@ PLAIN_TEXT_MIME_TYPE = "text/plain"
 class OnyxMimeTypes:
    IMAGE_MIME_TYPES = {"image/jpg", "image/jpeg", "image/png", "image/webp"}
    CSV_MIME_TYPES = {"text/csv"}
+    TABULAR_MIME_TYPES = CSV_MIME_TYPES | {SPREADSHEET_MIME_TYPE}
    TEXT_MIME_TYPES = {
        PLAIN_TEXT_MIME_TYPE,
        "text/markdown",
@@ -34,13 +35,12 @@ class OnyxMimeTypes:
        PDF_MIME_TYPE,
        WORD_PROCESSING_MIME_TYPE,
        PRESENTATION_MIME_TYPE,
-        SPREADSHEET_MIME_TYPE,
        "message/rfc822",
        "application/epub+zip",
    }

    ALLOWED_MIME_TYPES = IMAGE_MIME_TYPES.union(
-        TEXT_MIME_TYPES, DOCUMENT_MIME_TYPES, CSV_MIME_TYPES
+        TEXT_MIME_TYPES, DOCUMENT_MIME_TYPES, TABULAR_MIME_TYPES
    )

    EXCLUDED_IMAGE_TYPES = {
--- a/backend/onyx/file_store/models.py
+++ b/backend/onyx/file_store/models.py
@@ -13,13 +13,14 @@ class ChatFileType(str, Enum):
    DOC = "document"
    # Plain text only contain the text
    PLAIN_TEXT = "plain_text"
-    CSV = "csv"
+    # Tabular data files (CSV, XLSX)
+    TABULAR = "tabular"

    def is_text_file(self) -> bool:
        return self in (
            ChatFileType.PLAIN_TEXT,
            ChatFileType.DOC,
-            ChatFileType.CSV,
+            ChatFileType.TABULAR,
        )


--- a/backend/onyx/indexing/adapters/document_indexing_adapter.py
+++ b/backend/onyx/indexing/adapters/document_indexing_adapter.py
@@ -19,8 +19,7 @@ from onyx.db.document import update_docs_updated_at__no_commit
 from onyx.db.document_set import fetch_document_sets_for_documents
 from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
 from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
-from onyx.indexing.models import ChunkEnrichmentContext
-from onyx.indexing.models import DocAwareChunk
+from onyx.indexing.models import BuildMetadataAwareChunksResult
 from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import IndexChunk
 from onyx.indexing.models import UpdatableChunkData
@@ -86,21 +85,14 @@ class DocumentIndexingBatchAdapter:
        ) as transaction:
            yield transaction

-    def prepare_enrichment(
+    def build_metadata_aware_chunks(
        self,
-        context: DocumentBatchPrepareContext,
+        chunks_with_embeddings: list[IndexChunk],
+        chunk_content_scores: list[float],
        tenant_id: str,
-        chunks: list[DocAwareChunk],
-    ) -> "DocumentChunkEnricher":
-        """Do all DB lookups once and return a per-chunk enricher."""
-        updatable_ids = [doc.id for doc in context.updatable_docs]
-
-        doc_id_to_new_chunk_cnt: dict[str, int] = {
-            doc_id: 0 for doc_id in updatable_ids
-        }
-        for chunk in chunks:
-            if chunk.source_document.id in doc_id_to_new_chunk_cnt:
-                doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
+        context: DocumentBatchPrepareContext,
+    ) -> BuildMetadataAwareChunksResult:
+        """Enrich chunks with access, document sets, boosts, token counts, and hierarchy."""

        no_access = DocumentAccess.build(
            user_emails=[],
@@ -110,30 +102,67 @@ class DocumentIndexingBatchAdapter:
            is_public=False,
        )

-        return DocumentChunkEnricher(
-            doc_id_to_access_info=get_access_for_documents(
+        updatable_ids = [doc.id for doc in context.updatable_docs]
+
+        doc_id_to_access_info = get_access_for_documents(
+            document_ids=updatable_ids, db_session=self.db_session
+        )
+        doc_id_to_document_set = {
+            document_id: document_sets
+            for document_id, document_sets in fetch_document_sets_for_documents(
                document_ids=updatable_ids, db_session=self.db_session
-            ),
-            doc_id_to_document_set={
-                document_id: document_sets
-                for document_id, document_sets in fetch_document_sets_for_documents(
-                    document_ids=updatable_ids, db_session=self.db_session
-                )
-            },
-            doc_id_to_ancestor_ids=self._get_ancestor_ids_for_documents(
-                context.updatable_docs, tenant_id
-            ),
-            id_to_boost_map=context.id_to_boost_map,
-            doc_id_to_previous_chunk_cnt={
-                document_id: chunk_count
-                for document_id, chunk_count in fetch_chunk_counts_for_documents(
-                    document_ids=updatable_ids,
-                    db_session=self.db_session,
-                )
-            },
-            doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),
-            no_access=no_access,
-            tenant_id=tenant_id,
+            )
+        }
+
+        doc_id_to_previous_chunk_cnt: dict[str, int] = {
+            document_id: chunk_count
+            for document_id, chunk_count in fetch_chunk_counts_for_documents(
+                document_ids=updatable_ids,
+                db_session=self.db_session,
+            )
+        }
+
+        doc_id_to_new_chunk_cnt: dict[str, int] = {
+            doc_id: 0 for doc_id in updatable_ids
+        }
+        for chunk in chunks_with_embeddings:
+            if chunk.source_document.id in doc_id_to_new_chunk_cnt:
+                doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
+
+        # Get ancestor hierarchy node IDs for each document
+        doc_id_to_ancestor_ids = self._get_ancestor_ids_for_documents(
+            context.updatable_docs, tenant_id
+        )
+
+        access_aware_chunks = [
+            DocMetadataAwareIndexChunk.from_index_chunk(
+                index_chunk=chunk,
+                access=doc_id_to_access_info.get(chunk.source_document.id, no_access),
+                document_sets=set(
+                    doc_id_to_document_set.get(chunk.source_document.id, [])
+                ),
+                user_project=[],
+                personas=[],
+                boost=(
+                    context.id_to_boost_map[chunk.source_document.id]
+                    if chunk.source_document.id in context.id_to_boost_map
+                    else DEFAULT_BOOST
+                ),
+                tenant_id=tenant_id,
+                aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
+                ancestor_hierarchy_node_ids=doc_id_to_ancestor_ids[
+                    chunk.source_document.id
+                ],
+            )
+            for chunk_num, chunk in enumerate(chunks_with_embeddings)
+        ]
+
+        return BuildMetadataAwareChunksResult(
+            chunks=access_aware_chunks,
+            doc_id_to_previous_chunk_cnt=doc_id_to_previous_chunk_cnt,
+            doc_id_to_new_chunk_cnt=doc_id_to_new_chunk_cnt,
+            user_file_id_to_raw_text={},
+            user_file_id_to_token_count={},
        )

    def _get_ancestor_ids_for_documents(
@@ -174,7 +203,7 @@ class DocumentIndexingBatchAdapter:
        context: DocumentBatchPrepareContext,
        updatable_chunk_data: list[UpdatableChunkData],
        filtered_documents: list[Document],
-        enrichment: ChunkEnrichmentContext,
+        result: BuildMetadataAwareChunksResult,
    ) -> None:
        """Finalize DB updates, store plaintext, and mark docs as indexed."""
        updatable_ids = [doc.id for doc in context.updatable_docs]
@@ -198,7 +227,7 @@ class DocumentIndexingBatchAdapter:

        update_docs_chunk_count__no_commit(
            document_ids=updatable_ids,
-            doc_id_to_chunk_count=enrichment.doc_id_to_new_chunk_cnt,
+            doc_id_to_chunk_count=result.doc_id_to_new_chunk_cnt,
            db_session=self.db_session,
        )

@@ -220,52 +249,3 @@ class DocumentIndexingBatchAdapter:
        )

        self.db_session.commit()
-
-
-class DocumentChunkEnricher:
-    """Pre-computed metadata for per-chunk enrichment of connector documents."""
-
-    def __init__(
-        self,
-        doc_id_to_access_info: dict[str, DocumentAccess],
-        doc_id_to_document_set: dict[str, list[str]],
-        doc_id_to_ancestor_ids: dict[str, list[int]],
-        id_to_boost_map: dict[str, int],
-        doc_id_to_previous_chunk_cnt: dict[str, int],
-        doc_id_to_new_chunk_cnt: dict[str, int],
-        no_access: DocumentAccess,
-        tenant_id: str,
-    ) -> None:
-        self._doc_id_to_access_info = doc_id_to_access_info
-        self._doc_id_to_document_set = doc_id_to_document_set
-        self._doc_id_to_ancestor_ids = doc_id_to_ancestor_ids
-        self._id_to_boost_map = id_to_boost_map
-        self._no_access = no_access
-        self._tenant_id = tenant_id
-        self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt
-        self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt
-
-    def enrich_chunk(
-        self, chunk: IndexChunk, score: float
-    ) -> DocMetadataAwareIndexChunk:
-        return DocMetadataAwareIndexChunk.from_index_chunk(
-            index_chunk=chunk,
-            access=self._doc_id_to_access_info.get(
-                chunk.source_document.id, self._no_access
-            ),
-            document_sets=set(
-                self._doc_id_to_document_set.get(chunk.source_document.id, [])
-            ),
-            user_project=[],
-            personas=[],
-            boost=(
-                self._id_to_boost_map[chunk.source_document.id]
-                if chunk.source_document.id in self._id_to_boost_map
-                else DEFAULT_BOOST
-            ),
-            tenant_id=self._tenant_id,
-            aggregated_chunk_boost_factor=score,
-            ancestor_hierarchy_node_ids=self._doc_id_to_ancestor_ids[
-                chunk.source_document.id
-            ],
-        )
--- a/backend/onyx/indexing/adapters/user_file_indexing_adapter.py
+++ b/backend/onyx/indexing/adapters/user_file_indexing_adapter.py
@@ -1,9 +1,6 @@
-from __future__ import annotations
-
 import contextlib
 import datetime
 import time
-from collections import defaultdict
 from collections.abc import Generator
 from uuid import UUID

@@ -27,8 +24,7 @@ from onyx.db.user_file import fetch_persona_ids_for_user_files
 from onyx.db.user_file import fetch_user_project_ids_for_user_files
 from onyx.file_store.utils import store_user_file_plaintext
 from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
-from onyx.indexing.models import ChunkEnrichmentContext
-from onyx.indexing.models import DocAwareChunk
+from onyx.indexing.models import BuildMetadataAwareChunksResult
 from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import IndexChunk
 from onyx.indexing.models import UpdatableChunkData
@@ -106,20 +102,13 @@ class UserFileIndexingAdapter:
                f"Failed to acquire locks after {_NUM_LOCK_ATTEMPTS} attempts for user files: {[doc.id for doc in documents]}"
            )

-    def prepare_enrichment(
+    def build_metadata_aware_chunks(
        self,
-        context: DocumentBatchPrepareContext,
+        chunks_with_embeddings: list[IndexChunk],
+        chunk_content_scores: list[float],
        tenant_id: str,
-        chunks: list[DocAwareChunk],
-    ) -> UserFileChunkEnricher:
-        """Do all DB lookups and pre-compute file metadata from chunks."""
-        updatable_ids = [doc.id for doc in context.updatable_docs]
-
-        doc_id_to_new_chunk_cnt: dict[str, int] = defaultdict(int)
-        content_by_file: dict[str, list[str]] = defaultdict(list)
-        for chunk in chunks:
-            doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
-            content_by_file[chunk.source_document.id].append(chunk.content)
+        context: DocumentBatchPrepareContext,
+    ) -> BuildMetadataAwareChunksResult:

        no_access = DocumentAccess.build(
            user_emails=[],
@@ -129,6 +118,7 @@ class UserFileIndexingAdapter:
            is_public=False,
        )

+        updatable_ids = [doc.id for doc in context.updatable_docs]
        user_file_id_to_project_ids = fetch_user_project_ids_for_user_files(
            user_file_ids=updatable_ids,
            db_session=self.db_session,
@@ -149,6 +139,17 @@ class UserFileIndexingAdapter:
            )
        }

+        user_file_id_to_new_chunk_cnt: dict[str, int] = {
+            user_file_id: len(
+                [
+                    chunk
+                    for chunk in chunks_with_embeddings
+                    if chunk.source_document.id == user_file_id
+                ]
+            )
+            for user_file_id in updatable_ids
+        }
+
        # Initialize tokenizer used for token count calculation
        try:
            llm = get_default_llm()
@@ -163,9 +164,15 @@ class UserFileIndexingAdapter:
        user_file_id_to_raw_text: dict[str, str] = {}
        user_file_id_to_token_count: dict[str, int | None] = {}
        for user_file_id in updatable_ids:
-            contents = content_by_file.get(user_file_id)
-            if contents:
-                combined_content = " ".join(contents)
+            user_file_chunks = [
+                chunk
+                for chunk in chunks_with_embeddings
+                if chunk.source_document.id == user_file_id
+            ]
+            if user_file_chunks:
+                combined_content = " ".join(
+                    [chunk.content for chunk in user_file_chunks]
+                )
                user_file_id_to_raw_text[str(user_file_id)] = combined_content
                token_count: int = (
                    count_tokens(combined_content, llm_tokenizer)
@@ -177,16 +184,28 @@ class UserFileIndexingAdapter:
                user_file_id_to_raw_text[str(user_file_id)] = ""
                user_file_id_to_token_count[str(user_file_id)] = None

-        return UserFileChunkEnricher(
-            user_file_id_to_access=user_file_id_to_access,
-            user_file_id_to_project_ids=user_file_id_to_project_ids,
-            user_file_id_to_persona_ids=user_file_id_to_persona_ids,
+        access_aware_chunks = [
+            DocMetadataAwareIndexChunk.from_index_chunk(
+                index_chunk=chunk,
+                access=user_file_id_to_access.get(chunk.source_document.id, no_access),
+                document_sets=set(),
+                user_project=user_file_id_to_project_ids.get(
+                    chunk.source_document.id, []
+                ),
+                personas=user_file_id_to_persona_ids.get(chunk.source_document.id, []),
+                boost=DEFAULT_BOOST,
+                tenant_id=tenant_id,
+                aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
+            )
+            for chunk_num, chunk in enumerate(chunks_with_embeddings)
+        ]
+
+        return BuildMetadataAwareChunksResult(
+            chunks=access_aware_chunks,
            doc_id_to_previous_chunk_cnt=user_file_id_to_previous_chunk_cnt,
-            doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),
+            doc_id_to_new_chunk_cnt=user_file_id_to_new_chunk_cnt,
            user_file_id_to_raw_text=user_file_id_to_raw_text,
            user_file_id_to_token_count=user_file_id_to_token_count,
-            no_access=no_access,
-            tenant_id=tenant_id,
        )

    def _notify_assistant_owners_if_files_ready(
@@ -230,9 +249,8 @@ class UserFileIndexingAdapter:
        context: DocumentBatchPrepareContext,
        updatable_chunk_data: list[UpdatableChunkData],  # noqa: ARG002
        filtered_documents: list[Document],  # noqa: ARG002
-        enrichment: ChunkEnrichmentContext,
+        result: BuildMetadataAwareChunksResult,
    ) -> None:
-        assert isinstance(enrichment, UserFileChunkEnricher)
        user_file_ids = [doc.id for doc in context.updatable_docs]

        user_files = (
@@ -248,10 +266,8 @@ class UserFileIndexingAdapter:
            user_file.last_project_sync_at = datetime.datetime.now(
                datetime.timezone.utc
            )
-            user_file.chunk_count = enrichment.doc_id_to_new_chunk_cnt.get(
-                str(user_file.id), 0
-            )
-            user_file.token_count = enrichment.user_file_id_to_token_count[
+            user_file.chunk_count = result.doc_id_to_new_chunk_cnt[str(user_file.id)]
+            user_file.token_count = result.user_file_id_to_token_count[
                str(user_file.id)
            ]

@@ -263,54 +279,8 @@ class UserFileIndexingAdapter:
        # Store the plaintext in the file store for faster retrieval
        # NOTE: this creates its own session to avoid committing the overall
        # transaction.
-        for user_file_id, raw_text in enrichment.user_file_id_to_raw_text.items():
+        for user_file_id, raw_text in result.user_file_id_to_raw_text.items():
            store_user_file_plaintext(
                user_file_id=UUID(user_file_id),
                plaintext_content=raw_text,
            )
-
-
-class UserFileChunkEnricher:
-    """Pre-computed metadata for per-chunk enrichment of user-uploaded files."""
-
-    def __init__(
-        self,
-        user_file_id_to_access: dict[str, DocumentAccess],
-        user_file_id_to_project_ids: dict[str, list[int]],
-        user_file_id_to_persona_ids: dict[str, list[int]],
-        doc_id_to_previous_chunk_cnt: dict[str, int],
-        doc_id_to_new_chunk_cnt: dict[str, int],
-        user_file_id_to_raw_text: dict[str, str],
-        user_file_id_to_token_count: dict[str, int | None],
-        no_access: DocumentAccess,
-        tenant_id: str,
-    ) -> None:
-        self._user_file_id_to_access = user_file_id_to_access
-        self._user_file_id_to_project_ids = user_file_id_to_project_ids
-        self._user_file_id_to_persona_ids = user_file_id_to_persona_ids
-        self._no_access = no_access
-        self._tenant_id = tenant_id
-        self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt
-        self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt
-        self.user_file_id_to_raw_text = user_file_id_to_raw_text
-        self.user_file_id_to_token_count = user_file_id_to_token_count
-
-    def enrich_chunk(
-        self, chunk: IndexChunk, score: float
-    ) -> DocMetadataAwareIndexChunk:
-        return DocMetadataAwareIndexChunk.from_index_chunk(
-            index_chunk=chunk,
-            access=self._user_file_id_to_access.get(
-                chunk.source_document.id, self._no_access
-            ),
-            document_sets=set(),
-            user_project=self._user_file_id_to_project_ids.get(
-                chunk.source_document.id, []
-            ),
-            personas=self._user_file_id_to_persona_ids.get(
-                chunk.source_document.id, []
-            ),
-            boost=DEFAULT_BOOST,
-            tenant_id=self._tenant_id,
-            aggregated_chunk_boost_factor=score,
-        )
--- a/backend/onyx/indexing/chunk_batch_store.py
+++ b/backend/onyx/indexing/chunk_batch_store.py
@@ -1,89 +0,0 @@
-import pickle
-import shutil
-import tempfile
-from collections.abc import Iterator
-from pathlib import Path
-
-from onyx.indexing.models import IndexChunk
-
-
-class ChunkBatchStore:
-    """Manages serialization of embedded chunks to a temporary directory.
-
-    Owns the temp directory lifetime and provides save/load/stream/scrub
-    operations.
-
-    Use as a context manager to ensure cleanup::
-
-        with ChunkBatchStore() as store:
-            store.save(chunks, batch_idx=0)
-            for chunk in store.stream():
-                ...
-    """
-
-    _EXT = ".pkl"
-
-    def __init__(self) -> None:
-        self._tmpdir: Path | None = None
-
-    # -- context manager -----------------------------------------------------
-
-    def __enter__(self) -> "ChunkBatchStore":
-        self._tmpdir = Path(tempfile.mkdtemp(prefix="onyx_embeddings_"))
-        return self
-
-    def __exit__(self, *_exc: object) -> None:
-        if self._tmpdir is not None:
-            shutil.rmtree(self._tmpdir, ignore_errors=True)
-            self._tmpdir = None
-
-    @property
-    def _dir(self) -> Path:
-        assert self._tmpdir is not None, "ChunkBatchStore used outside context manager"
-        return self._tmpdir
-
-    # -- storage primitives --------------------------------------------------
-
-    def save(self, chunks: list[IndexChunk], batch_idx: int) -> None:
-        """Serialize a batch of embedded chunks to disk."""
-        with open(self._dir / f"batch_{batch_idx}{self._EXT}", "wb") as f:
-            pickle.dump(chunks, f)
-
-    def _load(self, batch_file: Path) -> list[IndexChunk]:
-        """Deserialize a batch of embedded chunks from a file."""
-        with open(batch_file, "rb") as f:
-            return pickle.load(f)
-
-    def _batch_files(self) -> list[Path]:
-        """Return batch files sorted by numeric index."""
-        return sorted(
-            self._dir.glob(f"batch_*{self._EXT}"),
-            key=lambda p: int(p.stem.removeprefix("batch_")),
-        )
-
-    # -- higher-level operations ---------------------------------------------
-
-    def stream(self) -> Iterator[IndexChunk]:
-        """Yield all chunks across all batch files.
-
-        Each call returns a fresh generator, so the data can be iterated
-        multiple times (e.g. once per document index).
-        """
-        for batch_file in self._batch_files():
-            yield from self._load(batch_file)
-
-    def scrub_failed_docs(self, failed_doc_ids: set[str]) -> None:
-        """Remove chunks belonging to *failed_doc_ids* from all batch files.
-
-        When a document fails embedding in batch N, earlier batches may
-        already contain successfully embedded chunks for that document.
-        This ensures the output is all-or-nothing per document.
-        """
-        for batch_file in self._batch_files():
-            batch_chunks = self._load(batch_file)
-            cleaned = [
-                c for c in batch_chunks if c.source_document.id not in failed_doc_ids
-            ]
-            if len(cleaned) != len(batch_chunks):
-                with open(batch_file, "wb") as f:
-                    pickle.dump(cleaned, f)
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -1,8 +1,5 @@
 from collections import defaultdict
 from collections.abc import Callable
-from collections.abc import Generator
-from collections.abc import Iterator
-from contextlib import contextmanager
 from typing import Protocol

 from pydantic import BaseModel
@@ -12,7 +9,6 @@ from sqlalchemy.orm import Session
 from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_NAME
 from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_PROVIDER
 from onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG
-from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
 from onyx.configs.app_configs import MAX_DOCUMENT_CHARS
 from onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION
 from onyx.configs.app_configs import USE_CHUNK_SUMMARY
@@ -47,12 +43,10 @@ from onyx.document_index.interfaces import DocumentMetadata
 from onyx.document_index.interfaces import IndexBatchParams
 from onyx.file_processing.image_summarization import summarize_image_with_error_handling
 from onyx.file_store.file_store import get_default_file_store
-from onyx.indexing.chunk_batch_store import ChunkBatchStore
 from onyx.indexing.chunker import Chunker
 from onyx.indexing.embedder import embed_chunks_with_failure_handling
 from onyx.indexing.embedder import IndexingEmbedder
 from onyx.indexing.models import DocAwareChunk
-from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import IndexingBatchAdapter
 from onyx.indexing.models import UpdatableChunkData
 from onyx.indexing.vector_db_insertion import write_chunks_to_vector_db_with_backoff
@@ -69,7 +63,6 @@ from onyx.natural_language_processing.utils import tokenizer_trim_middle
 from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT1
 from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT2
 from onyx.prompts.contextual_retrieval import DOCUMENT_SUMMARY_PROMPT
-from onyx.utils.batching import batch_generator
 from onyx.utils.logger import setup_logger
 from onyx.utils.postgres_sanitization import sanitize_documents_for_postgres
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
@@ -98,20 +91,6 @@ class IndexingPipelineResult(BaseModel):

    failures: list[ConnectorFailure]

-    @classmethod
-    def empty(cls, total_docs: int) -> "IndexingPipelineResult":
-        return cls(
-            new_docs=0,
-            total_docs=total_docs,
-            total_chunks=0,
-            failures=[],
-        )
-
-
-class ChunkEmbeddingResult(BaseModel):
-    successful_chunk_ids: list[tuple[int, str]]  # (chunk_id, document_id)
-    connector_failures: list[ConnectorFailure]
-

 class IndexingPipelineProtocol(Protocol):
    def __call__(
@@ -160,110 +139,6 @@ def _upsert_documents_in_db(
        )


-def _get_failed_doc_ids(failures: list[ConnectorFailure]) -> set[str]:
-    """Extract document IDs from a list of connector failures."""
-    return {f.failed_document.document_id for f in failures if f.failed_document}
-
-
-def _embed_chunks_to_store(
-    chunks: list[DocAwareChunk],
-    embedder: IndexingEmbedder,
-    tenant_id: str,
-    request_id: str | None,
-    store: ChunkBatchStore,
-) -> ChunkEmbeddingResult:
-    """Embed chunks in batches, spilling each batch to *store*.
-
-    If a document fails embedding in any batch, its chunks are excluded from
-    all batches (including earlier ones already written) so that the output
-    is all-or-nothing per document.
-    """
-    successful_chunk_ids: list[tuple[int, str]] = []
-    all_embedding_failures: list[ConnectorFailure] = []
-    # Track failed doc IDs across all batches so that a failure in batch N
-    # causes chunks for that doc to be skipped in batch N+1 and stripped
-    # from earlier batches.
-    all_failed_doc_ids: set[str] = set()
-
-    for batch_idx, chunk_batch in enumerate(
-        batch_generator(chunks, MAX_CHUNKS_PER_DOC_BATCH)
-    ):
-        # Skip chunks belonging to documents that failed in earlier batches.
-        chunk_batch = [
-            c for c in chunk_batch if c.source_document.id not in all_failed_doc_ids
-        ]
-        if not chunk_batch:
-            continue
-
-        logger.debug(f"Embedding batch {batch_idx}: {len(chunk_batch)} chunks")
-
-        chunks_with_embeddings, embedding_failures = embed_chunks_with_failure_handling(
-            chunks=chunk_batch,
-            embedder=embedder,
-            tenant_id=tenant_id,
-            request_id=request_id,
-        )
-        all_embedding_failures.extend(embedding_failures)
-        all_failed_doc_ids.update(_get_failed_doc_ids(embedding_failures))
-
-        # Only keep successfully embedded chunks for non-failed docs.
-        chunks_with_embeddings = [
-            c
-            for c in chunks_with_embeddings
-            if c.source_document.id not in all_failed_doc_ids
-        ]
-
-        successful_chunk_ids.extend(
-            (c.chunk_id, c.source_document.id) for c in chunks_with_embeddings
-        )
-
-        store.save(chunks_with_embeddings, batch_idx)
-        del chunks_with_embeddings
-
-    # Scrub earlier batches for docs that failed in later batches.
-    if all_failed_doc_ids:
-        store.scrub_failed_docs(all_failed_doc_ids)
-        successful_chunk_ids = [
-            (chunk_id, doc_id)
-            for chunk_id, doc_id in successful_chunk_ids
-            if doc_id not in all_failed_doc_ids
-        ]
-
-    return ChunkEmbeddingResult(
-        successful_chunk_ids=successful_chunk_ids,
-        connector_failures=all_embedding_failures,
-    )
-
-
-@contextmanager
-def embed_and_stream(
-    chunks: list[DocAwareChunk],
-    embedder: IndexingEmbedder,
-    tenant_id: str,
-    request_id: str | None,
-) -> Generator[tuple[ChunkEmbeddingResult, ChunkBatchStore], None, None]:
-    """Embed chunks to disk and yield a ``(result, store)`` pair.
-
-    The store owns the temp directory — files are cleaned up when the context
-    manager exits.
-
-    Usage::
-
-        with embed_and_stream(chunks, embedder, tenant_id, req_id) as (result, store):
-            for chunk in store.stream():
-                ...
-    """
-    with ChunkBatchStore() as store:
-        result = _embed_chunks_to_store(
-            chunks=chunks,
-            embedder=embedder,
-            tenant_id=tenant_id,
-            request_id=request_id,
-            store=store,
-        )
-        yield result, store
-
-
 def get_doc_ids_to_update(
    documents: list[Document], db_docs: list[DBDocument]
 ) -> list[Document]:
@@ -762,29 +637,6 @@ def add_contextual_summaries(
    return chunks


-def _verify_indexing_completeness(
-    insertion_records: list[DocumentInsertionRecord],
-    write_failures: list[ConnectorFailure],
-    embedding_failed_doc_ids: set[str],
-    updatable_ids: list[str],
-    document_index_name: str,
-) -> None:
-    """Verify that every updatable document was either indexed or reported as failed."""
-    all_returned_doc_ids = (
-        {r.document_id for r in insertion_records}
-        | {f.failed_document.document_id for f in write_failures if f.failed_document}
-        | embedding_failed_doc_ids
-    )
-    if all_returned_doc_ids != set(updatable_ids):
-        raise RuntimeError(
-            f"Some documents were not successfully indexed. "
-            f"Updatable IDs: {updatable_ids}, "
-            f"Returned IDs: {all_returned_doc_ids}. "
-            f"This should never happen. "
-            f"This occured for document index {document_index_name}"
-        )
-
-
@log_function_time(debug_only=True)
 def index_doc_batch(
    *,
@@ -820,7 +672,12 @@ def index_doc_batch(
    filtered_documents = filter_fnc(document_batch)
    context = adapter.prepare(filtered_documents, ignore_time_skip)
    if not context:
-        return IndexingPipelineResult.empty(len(filtered_documents))
+        return IndexingPipelineResult(
+            new_docs=0,
+            total_docs=len(filtered_documents),
+            total_chunks=0,
+            failures=[],
+        )

    # Convert documents to IndexingDocument objects with processed section
    # logger.debug("Processing image sections")
@@ -859,99 +716,117 @@ def index_doc_batch(
        )

    logger.debug("Starting embedding")
-    with embed_and_stream(chunks, embedder, tenant_id, request_id) as (
-        embedding_result,
-        chunk_store,
-    ):
-        updatable_ids = [doc.id for doc in context.updatable_docs]
-        updatable_chunk_data = [
-            UpdatableChunkData(
-                chunk_id=chunk_id,
-                document_id=document_id,
-                boost_score=1.0,
-            )
-            for chunk_id, document_id in embedding_result.successful_chunk_ids
-        ]
+    chunks_with_embeddings, embedding_failures = (
+        embed_chunks_with_failure_handling(
+            chunks=chunks,
+            embedder=embedder,
+            tenant_id=tenant_id,
+            request_id=request_id,
+        )
+        if chunks
+        else ([], [])
+    )

-        embedding_failed_doc_ids = _get_failed_doc_ids(
-            embedding_result.connector_failures
+    chunk_content_scores = [1.0] * len(chunks_with_embeddings)
+
+    updatable_ids = [doc.id for doc in context.updatable_docs]
+    updatable_chunk_data = [
+        UpdatableChunkData(
+            chunk_id=chunk.chunk_id,
+            document_id=chunk.source_document.id,
+            boost_score=score,
+        )
+        for chunk, score in zip(chunks_with_embeddings, chunk_content_scores)
+    ]
+
+    # Acquires a lock on the documents so that no other process can modify them
+    # NOTE: don't need to acquire till here, since this is when the actual race condition
+    # with Vespa can occur.
+    with adapter.lock_context(context.updatable_docs):
+        # we're concerned about race conditions where multiple simultaneous indexings might result
+        # in one set of metadata overwriting another one in vespa.
+        # we still write data here for the immediate and most likely correct sync, but
+        # to resolve this, an update of the last modified field at the end of this loop
+        # always triggers a final metadata sync via the celery queue
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=chunks_with_embeddings,
+            chunk_content_scores=chunk_content_scores,
+            tenant_id=tenant_id,
+            context=context,
        )

-        # Filter to only successfully embedded chunks so
-        # doc_id_to_new_chunk_cnt reflects what's actually written to Vespa.
-        embedded_chunks = [
-            c for c in chunks if c.source_document.id not in embedding_failed_doc_ids
-        ]
+        short_descriptor_list = [chunk.to_short_descriptor() for chunk in result.chunks]
+        short_descriptor_log = str(short_descriptor_list)[:1024]
+        logger.debug(f"Indexing the following chunks: {short_descriptor_log}")

-        # Acquires a lock on the documents so that no other process can modify
-        # them.  Not needed until here, since this is when the actual race
-        # condition with vector db can occur.
-        with adapter.lock_context(context.updatable_docs):
-            enricher = adapter.prepare_enrichment(
-                context=context,
-                tenant_id=tenant_id,
-                chunks=embedded_chunks,
+        primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = None
+        primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = None
+        for document_index in document_indices:
+            # A document will not be spread across different batches, so all the
+            # documents with chunks in this set, are fully represented by the chunks
+            # in this set
+            (
+                insertion_records,
+                vector_db_write_failures,
+            ) = write_chunks_to_vector_db_with_backoff(
+                document_index=document_index,
+                chunks=result.chunks,
+                index_batch_params=IndexBatchParams(
+                    doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
+                    doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
+                    tenant_id=tenant_id,
+                    large_chunks_enabled=chunker.enable_large_chunks,
+                ),
            )

-            index_batch_params = IndexBatchParams(
-                doc_id_to_previous_chunk_cnt=enricher.doc_id_to_previous_chunk_cnt,
-                doc_id_to_new_chunk_cnt=enricher.doc_id_to_new_chunk_cnt,
-                tenant_id=tenant_id,
-                large_chunks_enabled=chunker.enable_large_chunks,
-            )
-
-            primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = (
-                None
-            )
-            primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = (
-                None
-            )
-
-            for document_index in document_indices:
-
-                def _enriched_stream() -> Iterator[DocMetadataAwareIndexChunk]:
-                    for chunk in chunk_store.stream():
-                        yield enricher.enrich_chunk(chunk, 1.0)
-
-                insertion_records, write_failures = (
-                    write_chunks_to_vector_db_with_backoff(
-                        document_index=document_index,
-                        make_chunks=_enriched_stream,
-                        index_batch_params=index_batch_params,
-                    )
+            all_returned_doc_ids: set[str] = (
+                {record.document_id for record in insertion_records}
+                .union(
+                    {
+                        record.failed_document.document_id
+                        for record in vector_db_write_failures
+                        if record.failed_document
+                    }
                )
-
-                _verify_indexing_completeness(
-                    insertion_records=insertion_records,
-                    write_failures=write_failures,
-                    embedding_failed_doc_ids=embedding_failed_doc_ids,
-                    updatable_ids=updatable_ids,
-                    document_index_name=document_index.__class__.__name__,
+                .union(
+                    {
+                        record.failed_document.document_id
+                        for record in embedding_failures
+                        if record.failed_document
+                    }
                )
-                # We treat the first document index we got as the primary one used
-                # for reporting the state of indexing.
-                if primary_doc_idx_insertion_records is None:
-                    primary_doc_idx_insertion_records = insertion_records
-                if primary_doc_idx_vector_db_write_failures is None:
-                    primary_doc_idx_vector_db_write_failures = write_failures
-
-            adapter.post_index(
-                context=context,
-                updatable_chunk_data=updatable_chunk_data,
-                filtered_documents=filtered_documents,
-                enrichment=enricher,
            )
+            if all_returned_doc_ids != set(updatable_ids):
+                raise RuntimeError(
+                    f"Some documents were not successfully indexed. "
+                    f"Updatable IDs: {updatable_ids}, "
+                    f"Returned IDs: {all_returned_doc_ids}. "
+                    "This should never happen."
+                    f"This occured for document index {document_index.__class__.__name__}"
+                )
+            # We treat the first document index we got as the primary one used
+            # for reporting the state of indexing.
+            if primary_doc_idx_insertion_records is None:
+                primary_doc_idx_insertion_records = insertion_records
+            if primary_doc_idx_vector_db_write_failures is None:
+                primary_doc_idx_vector_db_write_failures = vector_db_write_failures
+
+        adapter.post_index(
+            context=context,
+            updatable_chunk_data=updatable_chunk_data,
+            filtered_documents=filtered_documents,
+            result=result,
+        )

    assert primary_doc_idx_insertion_records is not None
    assert primary_doc_idx_vector_db_write_failures is not None
    return IndexingPipelineResult(
-        new_docs=sum(
-            1 for r in primary_doc_idx_insertion_records if not r.already_existed
+        new_docs=len(
+            [r for r in primary_doc_idx_insertion_records if not r.already_existed]
        ),
        total_docs=len(filtered_documents),
-        total_chunks=len(embedding_result.successful_chunk_ids),
-        failures=primary_doc_idx_vector_db_write_failures
-        + embedding_result.connector_failures,
+        total_chunks=len(chunks_with_embeddings),
+        failures=primary_doc_idx_vector_db_write_failures + embedding_failures,
    )


--- a/backend/onyx/indexing/models.py
+++ b/backend/onyx/indexing/models.py
@@ -235,16 +235,12 @@ class UpdatableChunkData(BaseModel):
    boost_score: float


-class ChunkEnrichmentContext(Protocol):
-    """Returned by prepare_enrichment. Holds pre-computed metadata lookups
-    and provides per-chunk enrichment."""
-
+class BuildMetadataAwareChunksResult(BaseModel):
+    chunks: list[DocMetadataAwareIndexChunk]
    doc_id_to_previous_chunk_cnt: dict[str, int]
    doc_id_to_new_chunk_cnt: dict[str, int]
-
-    def enrich_chunk(
-        self, chunk: IndexChunk, score: float
-    ) -> DocMetadataAwareIndexChunk: ...
+    user_file_id_to_raw_text: dict[str, str]
+    user_file_id_to_token_count: dict[str, int | None]


 class IndexingBatchAdapter(Protocol):
@@ -258,24 +254,18 @@ class IndexingBatchAdapter(Protocol):
    ) -> Generator[TransactionalContext, None, None]:
        """Provide a transaction/row-lock context for critical updates."""

-    def prepare_enrichment(
+    def build_metadata_aware_chunks(
        self,
-        context: "DocumentBatchPrepareContext",
+        chunks_with_embeddings: list[IndexChunk],
+        chunk_content_scores: list[float],
        tenant_id: str,
-        chunks: list[DocAwareChunk],
-    ) -> ChunkEnrichmentContext:
-        """Prepare per-chunk enrichment data (access, document sets, boost, etc.).
-
-        Precondition: ``chunks`` have already been through the embedding step
-        (i.e. they are ``IndexChunk`` instances with populated embeddings,
-        passed here as the base ``DocAwareChunk`` type).
-        """
-        ...
+        context: "DocumentBatchPrepareContext",
+    ) -> BuildMetadataAwareChunksResult: ...

    def post_index(
        self,
        context: "DocumentBatchPrepareContext",
        updatable_chunk_data: list[UpdatableChunkData],
        filtered_documents: list[Document],
-        enrichment: ChunkEnrichmentContext,
+        result: BuildMetadataAwareChunksResult,
    ) -> None: ...
--- a/backend/onyx/indexing/vector_db_insertion.py
+++ b/backend/onyx/indexing/vector_db_insertion.py
@@ -1,9 +1,6 @@
 import time
-from collections.abc import Callable
-from collections.abc import Iterable
+from collections import defaultdict
 from http import HTTPStatus
-from itertools import chain
-from itertools import groupby

 import httpx

@@ -31,22 +28,22 @@ def _log_insufficient_storage_error(e: Exception) -> None:

 def write_chunks_to_vector_db_with_backoff(
    document_index: DocumentIndex,
-    make_chunks: Callable[[], Iterable[DocMetadataAwareIndexChunk]],
+    chunks: list[DocMetadataAwareIndexChunk],
    index_batch_params: IndexBatchParams,
 ) -> tuple[list[DocumentInsertionRecord], list[ConnectorFailure]]:
    """Tries to insert all chunks in one large batch. If that batch fails for any reason,
    goes document by document to isolate the failure(s).

    IMPORTANT: must pass in whole documents at a time not individual chunks, since the
-    vector DB interface assumes that all chunks for a single document are present. The
-    chunks must also be in contiguous batches
+    vector DB interface assumes that all chunks for a single document are present.
    """
+
    # first try to write the chunks to the vector db
    try:
        return (
            list(
                document_index.index(
-                    chunks=make_chunks(),
+                    chunks=chunks,
                    index_batch_params=index_batch_params,
                )
            ),
@@ -63,23 +60,14 @@ def write_chunks_to_vector_db_with_backoff(
        # wait a couple seconds just to give the vector db a chance to recover
        time.sleep(2)

+    # try writing each doc one by one
+    chunks_for_docs: dict[str, list[DocMetadataAwareIndexChunk]] = defaultdict(list)
+    for chunk in chunks:
+        chunks_for_docs[chunk.source_document.id].append(chunk)
+
    insertion_records: list[DocumentInsertionRecord] = []
    failures: list[ConnectorFailure] = []
-
-    def key(chunk: DocMetadataAwareIndexChunk) -> str:
-        return chunk.source_document.id
-
-    seen_doc_ids: set[str] = set()
-    for doc_id, chunks_for_doc in groupby(make_chunks(), key=key):
-        if doc_id in seen_doc_ids:
-            raise RuntimeError(
-                f"Doc chunks are not arriving in order. Current doc_id={doc_id}, seen_doc_ids={list(seen_doc_ids)}"
-            )
-        seen_doc_ids.add(doc_id)
-
-        first_chunk = next(chunks_for_doc)
-        chunks_for_doc = chain([first_chunk], chunks_for_doc)
-
+    for doc_id, chunks_for_doc in chunks_for_docs.items():
        try:
            insertion_records.extend(
                document_index.index(
@@ -99,7 +87,9 @@ def write_chunks_to_vector_db_with_backoff(
                ConnectorFailure(
                    failed_document=DocumentFailure(
                        document_id=doc_id,
-                        document_link=first_chunk.get_link(),
+                        document_link=(
+                            chunks_for_doc[0].get_link() if chunks_for_doc else None
+                        ),
                    ),
                    failure_message=str(e),
                    exception=e,
--- a/backend/onyx/llm/multi_llm.py
+++ b/backend/onyx/llm/multi_llm.py
@@ -185,21 +185,6 @@ def _messages_contain_tool_content(messages: list[dict[str, Any]]) -> bool:
    return False


-def _prompt_contains_tool_call_history(prompt: LanguageModelInput) -> bool:
-    """Check if the prompt contains any assistant messages with tool_calls.
-
-    When Anthropic's extended thinking is enabled, the API requires every
-    assistant message to start with a thinking block before any tool_use
-    blocks.  Since we don't preserve thinking_blocks (they carry
-    cryptographic signatures that can't be reconstructed), we must skip
-    the thinking param whenever history contains prior tool-calling turns.
-    """
-    from onyx.llm.models import AssistantMessage
-
-    msgs = prompt if isinstance(prompt, list) else [prompt]
-    return any(isinstance(msg, AssistantMessage) and msg.tool_calls for msg in msgs)
-
-
 def _is_vertex_model_rejecting_output_config(model_name: str) -> bool:
    normalized_model_name = model_name.lower()
    return any(
@@ -481,20 +466,7 @@ class LitellmLLM(LLM):
                    reasoning_effort
                )

-                # Anthropic requires every assistant message with tool_use
-                # blocks to start with a thinking block that carries a
-                # cryptographic signature.  We don't preserve those blocks
-                # across turns, so skip thinking when the history already
-                # contains tool-calling assistant messages.  LiteLLM's
-                # modify_params workaround doesn't cover all providers
-                # (notably Bedrock).
-                can_enable_thinking = (
-                    budget_tokens is not None
-                    and not _prompt_contains_tool_call_history(prompt)
-                )
-
-                if can_enable_thinking:
-                    assert budget_tokens is not None  # mypy
+                if budget_tokens is not None:
                    if max_tokens is not None:
                        # Anthropic has a weird rule where max token has to be at least as much as budget tokens if set
                        # and the minimum budget tokens is 1024
--- a/backend/onyx/main.py
+++ b/backend/onyx/main.py
@@ -439,7 +439,6 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
            dsn=SENTRY_DSN,
            integrations=[StarletteIntegration(), FastApiIntegration()],
            traces_sample_rate=0.1,
-            release=__version__,
        )
        logger.info("Sentry initialized")
    else:
--- a/backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/package-lock.json
+++ b/backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/package-lock.json
@@ -3844,9 +3844,9 @@
      }
    },
    "node_modules/@ts-morph/common/node_modules/brace-expansion": {
-      "version": "5.0.5",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
-      "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
+      "version": "5.0.3",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.3.tgz",
+      "integrity": "sha512-fy6KJm2RawA5RcHkLa1z/ScpBeA762UF9KmZQxwIbDtRJrgLzM10depAiEQ+CXYcoiqW1/m96OAAoke2nE9EeA==",
      "license": "MIT",
      "dependencies": {
        "balanced-match": "^4.0.2"
@@ -4224,9 +4224,9 @@
      }
    },
    "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
-      "version": "2.0.3",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.3.tgz",
-      "integrity": "sha512-MCV/fYJEbqx68aE58kv2cA/kiky1G8vux3OR6/jbS+jIMe/6fJWa0DTzJU7dqijOWYwHi1t29FlfYI9uytqlpA==",
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
+      "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
@@ -5007,9 +5007,9 @@
      }
    },
    "node_modules/brace-expansion": {
-      "version": "1.1.13",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.13.tgz",
-      "integrity": "sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==",
+      "version": "1.1.12",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
--- a/backend/onyx/server/features/hooks/api.py
+++ b/backend/onyx/server/features/hooks/api.py
@@ -123,8 +123,9 @@ def _validate_endpoint(
    (not reachable — indicates the api_key is invalid).

    Timeout handling:
-    - Any httpx.TimeoutException (ConnectTimeout, ReadTimeout, WriteTimeout, PoolTimeout) →
-      timeout (operator should consider increasing timeout_seconds).
+    - ConnectTimeout: TCP handshake never completed → cannot_connect.
+    - ReadTimeout / WriteTimeout: TCP was established, server responded slowly → timeout
+      (operator should consider increasing timeout_seconds).
    - All other exceptions → cannot_connect.
    """
    _check_ssrf_safety(endpoint_url)
--- a/backend/onyx/server/features/projects/projects_file_utils.py
+++ b/backend/onyx/server/features/projects/projects_file_utils.py
@@ -76,11 +76,26 @@ class CategorizedFiles(BaseModel):
    acceptable: list[UploadFile] = Field(default_factory=list)
    rejected: list[RejectedFile] = Field(default_factory=list)
    acceptable_file_to_token_count: dict[str, int] = Field(default_factory=dict)
+    # Filenames within `acceptable` that should be stored but not indexed.
+    skip_indexing: set[str] = Field(default_factory=set)

    # Allow FastAPI UploadFile instances
    model_config = ConfigDict(arbitrary_types_allowed=True)


+# Extensions that bypass the token-count threshold on upload.
+_TOKEN_THRESHOLD_EXEMPT_EXTENSIONS: set[str] = {
+    ".csv",
+    ".tsv",
+    ".xlsx",
+}
+
+
+def _skip_token_threshold(extension: str) -> bool:
+    """Return True if this file extension should bypass the token limit."""
+    return extension.lower() in _TOKEN_THRESHOLD_EXEMPT_EXTENSIONS
+
+
 def _apply_long_side_cap(width: int, height: int, cap: int) -> tuple[int, int]:
    if max(width, height) <= cap:
        return width, height
@@ -264,7 +279,17 @@ def categorize_uploaded_files(
                token_count = count_tokens(
                    text_content, tokenizer, token_limit=token_threshold
                )
-                if token_threshold is not None and token_count > token_threshold:
+                exceeds_threshold = (
+                    token_threshold is not None and token_count > token_threshold
+                )
+                if exceeds_threshold and _skip_token_threshold(extension):
+                    # Exempt extensions (e.g. spreadsheets) are accepted
+                    # but flagged to skip indexing — only metadata is
+                    # injected into the LLM context.
+                    results.acceptable.append(upload)
+                    results.acceptable_file_to_token_count[filename] = token_count
+                    results.skip_indexing.add(filename)
+                elif exceeds_threshold:
                    results.rejected.append(
                        RejectedFile(
                            filename=filename,
--- a/backend/onyx/server/metrics/indexing_pipeline.py
+++ b/backend/onyx/server/metrics/indexing_pipeline.py
@@ -12,6 +12,7 @@ stale, which is fine for monitoring dashboards.
 import json
 import threading
 import time
+from collections.abc import Callable
 from datetime import datetime
 from datetime import timezone
 from typing import Any
@@ -103,23 +104,25 @@ class _CachedCollector(Collector):


 class QueueDepthCollector(_CachedCollector):
-    """Reads Celery queue lengths from the broker Redis on each scrape."""
+    """Reads Celery queue lengths from the broker Redis on each scrape.
+
+    Uses a Redis client factory (callable) rather than a stored client
+    reference so the connection is always fresh from Celery's pool.
+    """

    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
        super().__init__(cache_ttl)
-        self._celery_app: Any | None = None
+        self._get_redis: Callable[[], Redis] | None = None

-    def set_celery_app(self, app: Any) -> None:
-        """Set the Celery app for broker Redis access."""
-        self._celery_app = app
+    def set_redis_factory(self, factory: Callable[[], Redis]) -> None:
+        """Set a callable that returns a broker Redis client on demand."""
+        self._get_redis = factory

    def _collect_fresh(self) -> list[GaugeMetricFamily]:
-        if self._celery_app is None:
+        if self._get_redis is None:
            return []

-        from onyx.background.celery.celery_redis import celery_get_broker_client
-
-        redis_client = celery_get_broker_client(self._celery_app)
+        redis_client = self._get_redis()

        depth = GaugeMetricFamily(
            "onyx_queue_depth",
@@ -401,19 +404,17 @@ class RedisHealthCollector(_CachedCollector):

    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
        super().__init__(cache_ttl)
-        self._celery_app: Any | None = None
+        self._get_redis: Callable[[], Redis] | None = None

-    def set_celery_app(self, app: Any) -> None:
-        """Set the Celery app for broker Redis access."""
-        self._celery_app = app
+    def set_redis_factory(self, factory: Callable[[], Redis]) -> None:
+        """Set a callable that returns a broker Redis client on demand."""
+        self._get_redis = factory

    def _collect_fresh(self) -> list[GaugeMetricFamily]:
-        if self._celery_app is None:
+        if self._get_redis is None:
            return []

-        from onyx.background.celery.celery_redis import celery_get_broker_client
-
-        redis_client = celery_get_broker_client(self._celery_app)
+        redis_client = self._get_redis()

        memory_used = GaugeMetricFamily(
            "onyx_redis_memory_used_bytes",
--- a/backend/onyx/server/metrics/indexing_pipeline_setup.py
+++ b/backend/onyx/server/metrics/indexing_pipeline_setup.py
@@ -3,8 +3,12 @@
 Called once by the monitoring celery worker after Redis and DB are ready.
 """

+from collections.abc import Callable
+from typing import Any
+
 from celery import Celery
 from prometheus_client.registry import REGISTRY
+from redis import Redis

 from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
 from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
@@ -17,7 +21,7 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()

 # Module-level singletons — these are lightweight objects (no connections or DB
-# state) until configure() / set_celery_app() is called. Keeping them at
+# state) until configure() / set_redis_factory() is called. Keeping them at
 # module level ensures they survive the lifetime of the worker process and are
 # only registered with the Prometheus registry once.
 _queue_collector = QueueDepthCollector()
@@ -28,15 +32,72 @@ _worker_health_collector = WorkerHealthCollector()
 _heartbeat_monitor: WorkerHeartbeatMonitor | None = None


+def _make_broker_redis_factory(celery_app: Celery) -> Callable[[], Redis]:
+    """Create a factory that returns a cached broker Redis client.
+
+    Reuses a single connection across scrapes to avoid leaking connections.
+    Reconnects automatically if the cached connection becomes stale.
+    """
+    _cached_client: list[Redis | None] = [None]
+    # Keep a reference to the Kombu Connection so we can close it on
+    # reconnect (the raw Redis client outlives the Kombu wrapper).
+    _cached_kombu_conn: list[Any] = [None]
+
+    def _close_client(client: Redis) -> None:
+        """Best-effort close of a Redis client."""
+        try:
+            client.close()
+        except Exception:
+            logger.debug("Failed to close stale Redis client", exc_info=True)
+
+    def _close_kombu_conn() -> None:
+        """Best-effort close of the cached Kombu Connection."""
+        conn = _cached_kombu_conn[0]
+        if conn is not None:
+            try:
+                conn.close()
+            except Exception:
+                logger.debug("Failed to close Kombu connection", exc_info=True)
+            _cached_kombu_conn[0] = None
+
+    def _get_broker_redis() -> Redis:
+        client = _cached_client[0]
+        if client is not None:
+            try:
+                client.ping()
+                return client
+            except Exception:
+                logger.debug("Cached Redis client stale, reconnecting")
+                _close_client(client)
+                _cached_client[0] = None
+                _close_kombu_conn()
+
+        # Get a fresh Redis client from the broker connection.
+        # We hold this client long-term (cached above) rather than using a
+        # context manager, because we need it to persist across scrapes.
+        # The caching logic above ensures we only ever hold one connection,
+        # and we close it explicitly on reconnect.
+        conn = celery_app.broker_connection()
+        # kombu's Channel exposes .client at runtime (the underlying Redis
+        # client) but the type stubs don't declare it.
+        new_client: Redis = conn.channel().client  # type: ignore[attr-defined]
+        _cached_client[0] = new_client
+        _cached_kombu_conn[0] = conn
+        return new_client
+
+    return _get_broker_redis
+
+
 def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
    """Register all indexing pipeline collectors with the default registry.

    Args:
-        celery_app: The Celery application instance. Used to obtain a
+        celery_app: The Celery application instance. Used to obtain a fresh
            broker Redis client on each scrape for queue depth metrics.
    """
-    _queue_collector.set_celery_app(celery_app)
-    _redis_health_collector.set_celery_app(celery_app)
+    redis_factory = _make_broker_redis_factory(celery_app)
+    _queue_collector.set_redis_factory(redis_factory)
+    _redis_health_collector.set_redis_factory(redis_factory)

    # Start the heartbeat monitor daemon thread — uses a single persistent
    # connection to receive worker-heartbeat events.
--- a/backend/onyx/server/query_and_chat/chat_utils.py
+++ b/backend/onyx/server/query_and_chat/chat_utils.py
@@ -9,8 +9,8 @@ def mime_type_to_chat_file_type(mime_type: str | None) -> ChatFileType:
    if mime_type in OnyxMimeTypes.IMAGE_MIME_TYPES:
        return ChatFileType.IMAGE

-    if mime_type in OnyxMimeTypes.CSV_MIME_TYPES:
-        return ChatFileType.CSV
+    if mime_type in OnyxMimeTypes.TABULAR_MIME_TYPES:
+        return ChatFileType.TABULAR

    if mime_type in OnyxMimeTypes.DOCUMENT_MIME_TYPES:
        return ChatFileType.DOC
--- a/backend/onyx/tools/tool_implementations/file_reader/file_reader_tool.py
+++ b/backend/onyx/tools/tool_implementations/file_reader/file_reader_tool.py
@@ -1,3 +1,4 @@
+import io
 import json
 from typing import Any
 from typing import cast
@@ -9,6 +10,7 @@ from typing_extensions import override
 from onyx.chat.emitter import Emitter
 from onyx.configs.app_configs import DISABLE_VECTOR_DB
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
+from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_store.models import ChatFileType
 from onyx.file_store.models import InMemoryChatFile
 from onyx.file_store.utils import load_chat_file_by_id
@@ -169,10 +171,13 @@ class FileReaderTool(Tool[FileReaderToolOverrideKwargs]):

        chat_file = self._load_file(file_id)

-        # Only PLAIN_TEXT and CSV are guaranteed to contain actual text bytes.
+        # Only PLAIN_TEXT and TABULAR are guaranteed to contain actual text bytes.
        # DOC type in a loaded file means plaintext extraction failed and the
        # content is the original binary (e.g. raw PDF/DOCX bytes).
-        if chat_file.file_type not in (ChatFileType.PLAIN_TEXT, ChatFileType.CSV):
+        if chat_file.file_type not in (
+            ChatFileType.PLAIN_TEXT,
+            ChatFileType.TABULAR,
+        ):
            raise ToolCallException(
                message=f"File {file_id} is not a text file (type={chat_file.file_type})",
                llm_facing_message=(
@@ -181,7 +186,19 @@ class FileReaderTool(Tool[FileReaderToolOverrideKwargs]):
            )

        try:
-            full_text = chat_file.content.decode("utf-8", errors="replace")
+            if chat_file.file_type == ChatFileType.PLAIN_TEXT:
+                full_text = chat_file.content.decode("utf-8", errors="replace")
+            else:
+                full_text = (
+                    extract_file_text(
+                        file=io.BytesIO(chat_file.content),
+                        file_name=chat_file.filename or "",
+                        break_on_unprocessable=False,
+                    )
+                    or ""
+                )
+        except ToolCallException:
+            raise
        except Exception:
            raise ToolCallException(
                message=f"Failed to decode file {file_id}",
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -187,7 +187,7 @@ coloredlogs==15.0.1
    # via onnxruntime
 courlan==1.3.2
    # via trafilatura
-cryptography==46.0.6
+cryptography==46.0.5
    # via
    #   authlib
    #   google-auth
@@ -449,7 +449,7 @@ kombu==5.5.4
    # via celery
 kubernetes==31.0.0
    # via onyx
-langchain-core==1.2.22
+langchain-core==1.2.11
    # via onyx
 langdetect==1.0.9
    # via unstructured
@@ -735,7 +735,7 @@ pyee==13.0.0
    # via playwright
 pygithub==2.5.0
    # via onyx
-pygments==2.20.0
+pygments==2.19.2
    # via rich
 pyjwt==2.12.0
    # via
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@@ -97,7 +97,7 @@ comm==0.2.3
    # via ipykernel
 contourpy==1.3.3
    # via matplotlib
-cryptography==46.0.6
+cryptography==46.0.5
    # via
    #   google-auth
    #   pyjwt
@@ -263,7 +263,7 @@ oauthlib==3.2.2
    # via
    #   kubernetes
    #   requests-oauthlib
-onyx-devtools==0.7.2
+onyx-devtools==0.7.1
    # via onyx
 openai==2.14.0
    # via
@@ -349,7 +349,7 @@ pydantic-core==2.33.2
    # via pydantic
 pydantic-settings==2.12.0
    # via mcp
-pygments==2.20.0
+pygments==2.19.2
    # via
    #   ipython
    #   ipython-pygments-lexers
--- a/backend/requirements/ee.txt
+++ b/backend/requirements/ee.txt
@@ -76,7 +76,7 @@ colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   tqdm
-cryptography==46.0.6
+cryptography==46.0.5
    # via
    #   google-auth
    #   pyjwt
--- a/backend/requirements/model_server.txt
+++ b/backend/requirements/model_server.txt
@@ -92,7 +92,7 @@ colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   tqdm
-cryptography==46.0.6
+cryptography==46.0.5
    # via
    #   google-auth
    #   pyjwt
--- a/backend/scripts/run_industryrag_bench_questions.py
+++ b/backend/scripts/run_industryrag_bench_questions.py
@@ -5,7 +5,6 @@ import asyncio
 import json
 import logging
 import sys
-import time
 from dataclasses import asdict
 from dataclasses import dataclass
 from pathlib import Path
@@ -28,9 +27,6 @@ INTERNAL_SEARCH_TOOL_NAME = "internal_search"
 INTERNAL_SEARCH_IN_CODE_TOOL_ID = "SearchTool"
 MAX_REQUEST_ATTEMPTS = 5
 RETRIABLE_STATUS_CODES = {429, 500, 502, 503, 504}
-QUESTION_TIMEOUT_SECONDS = 300
-QUESTION_RETRY_PAUSE_SECONDS = 30
-MAX_QUESTION_ATTEMPTS = 3


@dataclass(frozen=True)
@@ -113,27 +109,6 @@ def normalize_api_base(api_base: str) -> str:
    return f"{normalized}/api"


-def load_completed_question_ids(output_file: Path) -> set[str]:
-    if not output_file.exists():
-        return set()
-
-    completed_ids: set[str] = set()
-    with output_file.open("r", encoding="utf-8") as file:
-        for line in file:
-            stripped = line.strip()
-            if not stripped:
-                continue
-            try:
-                record = json.loads(stripped)
-            except json.JSONDecodeError:
-                continue
-            question_id = record.get("question_id")
-            if isinstance(question_id, str) and question_id:
-                completed_ids.add(question_id)
-
-    return completed_ids
-
-
 def load_questions(questions_file: Path) -> list[QuestionRecord]:
    if not questions_file.exists():
        raise FileNotFoundError(f"Questions file not found: {questions_file}")
@@ -373,7 +348,6 @@ async def generate_answers(
    api_base: str,
    api_key: str,
    parallelism: int,
-    skipped: int,
 ) -> None:
    if parallelism < 1:
        raise ValueError("`--parallelism` must be at least 1.")
@@ -408,178 +382,58 @@ async def generate_answers(
            write_lock = asyncio.Lock()
            completed = 0
            successful = 0
-            stuck_count = 0
            failed_questions: list[FailedQuestionRecord] = []
-            remaining_count = len(questions)
-            overall_total = remaining_count + skipped
-            question_durations: list[float] = []
-            run_start_time = time.monotonic()
-
-            def print_progress() -> None:
-                avg_time = (
-                    sum(question_durations) / len(question_durations)
-                    if question_durations
-                    else 0.0
-                )
-                elapsed = time.monotonic() - run_start_time
-                eta = avg_time * (remaining_count - completed) / max(parallelism, 1)
-
-                done = skipped + completed
-                bar_width = 30
-                filled = (
-                    int(bar_width * done / overall_total)
-                    if overall_total
-                    else bar_width
-                )
-                bar = "█" * filled + "░" * (bar_width - filled)
-                pct = (done / overall_total * 100) if overall_total else 100.0
-
-                parts = (
-                    f"\r{bar} {pct:5.1f}% "
-                    f"[{done}/{overall_total}] "
-                    f"avg {avg_time:.1f}s/q "
-                    f"elapsed {elapsed:.0f}s "
-                    f"ETA {eta:.0f}s "
-                    f"(ok:{successful} fail:{len(failed_questions)}"
-                )
-                if stuck_count:
-                    parts += f" stuck:{stuck_count}"
-                if skipped:
-                    parts += f" skip:{skipped}"
-                parts += ")"
-
-                sys.stderr.write(parts)
-                sys.stderr.flush()
-
-            print_progress()
+            total = len(questions)

            async def process_question(question_record: QuestionRecord) -> None:
                nonlocal completed
                nonlocal successful
-                nonlocal stuck_count

-                last_error: Exception | None = None
-                for attempt in range(1, MAX_QUESTION_ATTEMPTS + 1):
-                    q_start = time.monotonic()
-                    try:
-                        async with semaphore:
-                            result = await asyncio.wait_for(
-                                submit_question(
-                                    session=session,
-                                    api_base=api_base,
-                                    headers=headers,
-                                    internal_search_tool_id=internal_search_tool_id,
-                                    question_record=question_record,
-                                ),
-                                timeout=QUESTION_TIMEOUT_SECONDS,
-                            )
-                    except asyncio.TimeoutError:
-                        async with progress_lock:
-                            stuck_count += 1
-                            logger.warning(
-                                "Question %s timed out after %ss (attempt %s/%s, "
-                                "total stuck: %s) — retrying in %ss",
-                                question_record.question_id,
-                                QUESTION_TIMEOUT_SECONDS,
-                                attempt,
-                                MAX_QUESTION_ATTEMPTS,
-                                stuck_count,
-                                QUESTION_RETRY_PAUSE_SECONDS,
-                            )
-                            print_progress()
-                        last_error = TimeoutError(
-                            f"Timed out after {QUESTION_TIMEOUT_SECONDS}s "
-                            f"on attempt {attempt}/{MAX_QUESTION_ATTEMPTS}"
+                try:
+                    async with semaphore:
+                        result = await submit_question(
+                            session=session,
+                            api_base=api_base,
+                            headers=headers,
+                            internal_search_tool_id=internal_search_tool_id,
+                            question_record=question_record,
                        )
-                        await asyncio.sleep(QUESTION_RETRY_PAUSE_SECONDS)
-                        continue
-                    except Exception as exc:
-                        duration = time.monotonic() - q_start
-                        async with progress_lock:
-                            completed += 1
-                            question_durations.append(duration)
-                            failed_questions.append(
-                                FailedQuestionRecord(
-                                    question_id=question_record.question_id,
-                                    error=str(exc),
-                                )
-                            )
-                            logger.exception(
-                                "Failed question %s (%s/%s)",
-                                question_record.question_id,
-                                completed,
-                                remaining_count,
-                            )
-                            print_progress()
-                        return
-
-                    duration = time.monotonic() - q_start
-
-                    async with write_lock:
-                        file.write(json.dumps(asdict(result), ensure_ascii=False))
-                        file.write("\n")
-                        file.flush()
-
+                except Exception as exc:
                    async with progress_lock:
                        completed += 1
-                        successful += 1
-                        question_durations.append(duration)
-                        print_progress()
+                        failed_questions.append(
+                            FailedQuestionRecord(
+                                question_id=question_record.question_id,
+                                error=str(exc),
+                            )
+                        )
+                        logger.exception(
+                            "Failed question %s (%s/%s)",
+                            question_record.question_id,
+                            completed,
+                            total,
+                        )
                    return

-                # All attempts exhausted due to timeouts
+                async with write_lock:
+                    file.write(json.dumps(asdict(result), ensure_ascii=False))
+                    file.write("\n")
+                    file.flush()
+
                async with progress_lock:
                    completed += 1
-                    failed_questions.append(
-                        FailedQuestionRecord(
-                            question_id=question_record.question_id,
-                            error=str(last_error),
-                        )
-                    )
-                    logger.error(
-                        "Question %s failed after %s timeout attempts (%s/%s)",
-                        question_record.question_id,
-                        MAX_QUESTION_ATTEMPTS,
-                        completed,
-                        remaining_count,
-                    )
-                    print_progress()
+                    successful += 1
+                    logger.info("Processed %s/%s questions", completed, total)

            await asyncio.gather(
                *(process_question(question_record) for question_record in questions)
            )

-            # Final newline after progress bar
-            sys.stderr.write("\n")
-            sys.stderr.flush()
-
-            total_elapsed = time.monotonic() - run_start_time
-            avg_time = (
-                sum(question_durations) / len(question_durations)
-                if question_durations
-                else 0.0
-            )
-            stuck_suffix = f", {stuck_count} stuck timeouts" if stuck_count else ""
-            resume_suffix = (
-                f" — {skipped} previously completed, "
-                f"{skipped + successful}/{overall_total} overall"
-                if skipped
-                else ""
-            )
-            logger.info(
-                "Done: %s/%s successful in %.1fs (avg %.1fs/question%s)%s",
-                successful,
-                remaining_count,
-                total_elapsed,
-                avg_time,
-                stuck_suffix,
-                resume_suffix,
-            )
-
            if failed_questions:
                logger.warning(
-                    "%s questions failed:",
+                    "Completed with %s failed questions and %s successful questions.",
                    len(failed_questions),
+                    successful,
                )
                for failed_question in failed_questions:
                    logger.warning(
@@ -599,30 +453,7 @@ def main() -> None:
            raise ValueError("`--max-questions` must be at least 1 when provided.")
        questions = questions[: args.max_questions]

-    completed_ids = load_completed_question_ids(args.output_file)
-    logger.info(
-        "Found %s already-answered question IDs in %s",
-        len(completed_ids),
-        args.output_file,
-    )
-    total_before_filter = len(questions)
-    questions = [q for q in questions if q.question_id not in completed_ids]
-    skipped = total_before_filter - len(questions)
-
-    if skipped:
-        logger.info(
-            "Resuming: %s/%s already answered, %s remaining",
-            skipped,
-            total_before_filter,
-            len(questions),
-        )
-    else:
-        logger.info("Loaded %s questions from %s", len(questions), args.questions_file)
-
-    if not questions:
-        logger.info("All questions already answered. Nothing to do.")
-        return
-
+    logger.info("Loaded %s questions from %s", len(questions), args.questions_file)
    logger.info("Writing answers to %s", args.output_file)

    asyncio.run(
@@ -632,7 +463,6 @@ def main() -> None:
            api_base=api_base,
            api_key=args.api_key,
            parallelism=args.parallelism,
-            skipped=skipped,
        )
    )

--- a/backend/tests/external_dependency_unit/celery/test_persona_file_sync.py
+++ b/backend/tests/external_dependency_unit/celery/test_persona_file_sync.py
@@ -129,10 +129,6 @@ def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, Non
            return_value=mock_app,
        ),
        patch(_PATCH_QUEUE_DEPTH, return_value=0),
-        patch(
-            "onyx.background.celery.tasks.user_file_processing.tasks.celery_get_broker_client",
-            return_value=MagicMock(),
-        ),
    ):
        yield

--- a/backend/tests/external_dependency_unit/celery/test_user_file_delete_queue.py
+++ b/backend/tests/external_dependency_unit/celery/test_user_file_delete_queue.py
@@ -88,22 +88,10 @@ def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, Non
    the actual task instance.  We patch ``app`` on that instance's class
    (a unique Celery-generated Task subclass) so the mock is scoped to this
    task only.
-
-    Also patches ``celery_get_broker_client`` so the mock app doesn't need
-    a real broker URL.
    """
    task_instance = task.run.__self__
-    with (
-        patch.object(
-            type(task_instance),
-            "app",
-            new_callable=PropertyMock,
-            return_value=mock_app,
-        ),
-        patch(
-            "onyx.background.celery.tasks.user_file_processing.tasks.celery_get_broker_client",
-            return_value=MagicMock(),
-        ),
+    with patch.object(
+        type(task_instance), "app", new_callable=PropertyMock, return_value=mock_app
    ):
        yield

--- a/backend/tests/external_dependency_unit/celery/test_user_file_indexing_adapter.py
+++ b/backend/tests/external_dependency_unit/celery/test_user_file_indexing_adapter.py
@@ -1,7 +1,7 @@
 """
 External dependency unit tests for UserFileIndexingAdapter metadata writing.

-Validates that prepare_enrichment produces DocMetadataAwareIndexChunk
+Validates that build_metadata_aware_chunks produces DocMetadataAwareIndexChunk
 objects with both `user_project` and `personas` fields populated correctly
 based on actual DB associations.

@@ -127,7 +127,7 @@ def _make_index_chunk(user_file: UserFile) -> IndexChunk:


 class TestAdapterWritesBothMetadataFields:
-    """prepare_enrichment must populate user_project AND personas."""
+    """build_metadata_aware_chunks must populate user_project AND personas."""

    @patch(
        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
@@ -153,13 +153,15 @@ class TestAdapterWritesBothMetadataFields:
        doc = chunk.source_document
        context = DocumentBatchPrepareContext(updatable_docs=[doc], id_to_boost_map={})

-        enricher = adapter.prepare_enrichment(
-            context=context,
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
            tenant_id=TEST_TENANT_ID,
-            chunks=[chunk],
+            context=context,
        )
-        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

+        assert len(result.chunks) == 1
+        aware_chunk = result.chunks[0]
        assert persona.id in aware_chunk.personas
        assert aware_chunk.user_project == []

@@ -188,13 +190,15 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        enricher = adapter.prepare_enrichment(
-            context=context,
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
            tenant_id=TEST_TENANT_ID,
-            chunks=[chunk],
+            context=context,
        )
-        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

+        assert len(result.chunks) == 1
+        aware_chunk = result.chunks[0]
        assert project.id in aware_chunk.user_project
        assert aware_chunk.personas == []

@@ -225,13 +229,14 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        enricher = adapter.prepare_enrichment(
-            context=context,
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
            tenant_id=TEST_TENANT_ID,
-            chunks=[chunk],
+            context=context,
        )
-        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

+        aware_chunk = result.chunks[0]
        assert persona.id in aware_chunk.personas
        assert project.id in aware_chunk.user_project

@@ -256,13 +261,14 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        enricher = adapter.prepare_enrichment(
-            context=context,
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
            tenant_id=TEST_TENANT_ID,
-            chunks=[chunk],
+            context=context,
        )
-        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

+        aware_chunk = result.chunks[0]
        assert aware_chunk.personas == []
        assert aware_chunk.user_project == []

@@ -294,11 +300,12 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        enricher = adapter.prepare_enrichment(
-            context=context,
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
            tenant_id=TEST_TENANT_ID,
-            chunks=[chunk],
+            context=context,
        )
-        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

+        aware_chunk = result.chunks[0]
        assert set(aware_chunk.personas) == {persona_a.id, persona_b.id}
--- a/backend/tests/external_dependency_unit/celery/test_user_file_processing_queue.py
+++ b/backend/tests/external_dependency_unit/celery/test_user_file_processing_queue.py
@@ -90,17 +90,8 @@ def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, Non
    task only.
    """
    task_instance = task.run.__self__
-    with (
-        patch.object(
-            type(task_instance),
-            "app",
-            new_callable=PropertyMock,
-            return_value=mock_app,
-        ),
-        patch(
-            "onyx.background.celery.tasks.user_file_processing.tasks.celery_get_broker_client",
-            return_value=MagicMock(),
-        ),
+    with patch.object(
+        type(task_instance), "app", new_callable=PropertyMock, return_value=mock_app
    ):
        yield

--- a/backend/tests/external_dependency_unit/document_index/test_document_index.py
+++ b/backend/tests/external_dependency_unit/document_index/test_document_index.py
@@ -6,7 +6,6 @@ These tests assume Vespa and OpenSearch are running.
 import time
 import uuid
 from collections.abc import Generator
-from collections.abc import Iterator

 import httpx
 import pytest
@@ -22,7 +21,6 @@ from onyx.document_index.opensearch.opensearch_document_index import (
 )
 from onyx.document_index.vespa.index import VespaIndex
 from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
-from onyx.indexing.models import DocMetadataAwareIndexChunk
 from tests.external_dependency_unit.constants import TEST_TENANT_ID
 from tests.external_dependency_unit.document_index.conftest import EMBEDDING_DIM
 from tests.external_dependency_unit.document_index.conftest import make_chunk
@@ -203,25 +201,3 @@ class TestDocumentIndexNew:
            assert len(result_map) == 2
            assert result_map[existing_doc] is True
            assert result_map[new_doc] is False
-
-    def test_index_accepts_generator(
-        self,
-        document_indices: list[DocumentIndexNew],
-        tenant_context: None,  # noqa: ARG002
-    ) -> None:
-        """index() accepts a generator (any iterable), not just a list."""
-        for document_index in document_indices:
-            doc_id = f"test_gen_{uuid.uuid4().hex[:8]}"
-            metadata = make_indexing_metadata([doc_id], old_counts=[0], new_counts=[3])
-
-            def chunk_gen() -> Iterator[DocMetadataAwareIndexChunk]:
-                for i in range(3):
-                    yield make_chunk(doc_id, chunk_id=i)
-
-            results = document_index.index(
-                chunks=chunk_gen(), indexing_metadata=metadata
-            )
-
-            assert len(results) == 1
-            assert results[0].document_id == doc_id
-            assert results[0].already_existed is False
--- a/backend/tests/external_dependency_unit/document_index/test_document_index_old.py
+++ b/backend/tests/external_dependency_unit/document_index/test_document_index_old.py
@@ -5,7 +5,6 @@ These tests assume Vespa and OpenSearch are running.

 import time
 from collections.abc import Generator
-from collections.abc import Iterator

 import pytest

@@ -167,29 +166,3 @@ class TestDocumentIndexOld:
                batch_retrieval=True,
            )
            assert len(inference_chunks) == 0
-
-    def test_index_accepts_generator(
-        self,
-        document_indices: list[DocumentIndex],
-        tenant_context: None,  # noqa: ARG002
-    ) -> None:
-        """index() accepts a generator (any iterable), not just a list."""
-        for document_index in document_indices:
-
-            def chunk_gen() -> Iterator[DocMetadataAwareIndexChunk]:
-                for i in range(3):
-                    yield make_chunk("test_doc_gen", chunk_id=i)
-
-            index_batch_params = IndexBatchParams(
-                doc_id_to_previous_chunk_cnt={"test_doc_gen": 0},
-                doc_id_to_new_chunk_cnt={"test_doc_gen": 3},
-                tenant_id=get_current_tenant_id(),
-                large_chunks_enabled=False,
-            )
-
-            results = document_index.index(chunk_gen(), index_batch_params)
-
-            assert len(results) == 1
-            record = results.pop()
-            assert record.document_id == "test_doc_gen"
-            assert record.already_existed is False
--- a/backend/tests/external_dependency_unit/tools/test_python_tool.py
+++ b/backend/tests/external_dependency_unit/tools/test_python_tool.py
@@ -1175,7 +1175,7 @@ def test_code_interpreter_receives_chat_files(

    file_descriptor: FileDescriptor = {
        "id": user_file.file_id,
-        "type": ChatFileType.CSV,
+        "type": ChatFileType.TABULAR,
        "name": "data.csv",
        "user_file_id": str(user_file.id),
    }
--- a/backend/tests/unit/onyx/background/celery/test_celery_redis.py
+++ b/backend/tests/unit/onyx/background/celery/test_celery_redis.py
@@ -1,87 +0,0 @@
-"""Tests for celery_get_broker_client singleton."""
-
-from collections.abc import Iterator
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-import pytest
-
-from onyx.background.celery import celery_redis
-
-
-@pytest.fixture(autouse=True)
-def reset_singleton() -> Iterator[None]:
-    """Reset the module-level singleton between tests."""
-    celery_redis._broker_client = None
-    celery_redis._broker_url = None
-    yield
-    celery_redis._broker_client = None
-    celery_redis._broker_url = None
-
-
-def _make_mock_app(broker_url: str = "redis://localhost:6379/15") -> MagicMock:
-    app = MagicMock()
-    app.conf.broker_url = broker_url
-    return app
-
-
-class TestCeleryGetBrokerClient:
-    @patch("onyx.background.celery.celery_redis.Redis")
-    def test_creates_client_on_first_call(self, mock_redis_cls: MagicMock) -> None:
-        mock_client = MagicMock()
-        mock_redis_cls.from_url.return_value = mock_client
-
-        app = _make_mock_app()
-        result = celery_redis.celery_get_broker_client(app)
-
-        assert result is mock_client
-        call_args = mock_redis_cls.from_url.call_args
-        assert call_args[0][0] == "redis://localhost:6379/15"
-        assert call_args[1]["decode_responses"] is False
-        assert call_args[1]["socket_keepalive"] is True
-        assert call_args[1]["retry_on_timeout"] is True
-
-    @patch("onyx.background.celery.celery_redis.Redis")
-    def test_reuses_cached_client(self, mock_redis_cls: MagicMock) -> None:
-        mock_client = MagicMock()
-        mock_client.ping.return_value = True
-        mock_redis_cls.from_url.return_value = mock_client
-
-        app = _make_mock_app()
-        client1 = celery_redis.celery_get_broker_client(app)
-        client2 = celery_redis.celery_get_broker_client(app)
-
-        assert client1 is client2
-        # from_url called only once
-        assert mock_redis_cls.from_url.call_count == 1
-
-    @patch("onyx.background.celery.celery_redis.Redis")
-    def test_reconnects_on_ping_failure(self, mock_redis_cls: MagicMock) -> None:
-        stale_client = MagicMock()
-        stale_client.ping.side_effect = ConnectionError("disconnected")
-
-        fresh_client = MagicMock()
-        fresh_client.ping.return_value = True
-
-        mock_redis_cls.from_url.side_effect = [stale_client, fresh_client]
-
-        app = _make_mock_app()
-
-        # First call creates stale_client
-        client1 = celery_redis.celery_get_broker_client(app)
-        assert client1 is stale_client
-
-        # Second call: ping fails, creates fresh_client
-        client2 = celery_redis.celery_get_broker_client(app)
-        assert client2 is fresh_client
-        assert mock_redis_cls.from_url.call_count == 2
-
-    @patch("onyx.background.celery.celery_redis.Redis")
-    def test_uses_broker_url_from_app_config(self, mock_redis_cls: MagicMock) -> None:
-        mock_redis_cls.from_url.return_value = MagicMock()
-
-        app = _make_mock_app("redis://custom-host:6380/3")
-        celery_redis.celery_get_broker_client(app)
-
-        call_args = mock_redis_cls.from_url.call_args
-        assert call_args[0][0] == "redis://custom-host:6380/3"
--- a/backend/tests/unit/onyx/chat/test_save_chat.py
+++ b/backend/tests/unit/onyx/chat/test_save_chat.py
@@ -139,7 +139,7 @@ def test_csv_file_type() -> None:
    result = _extract_referenced_file_descriptors([tool_call], message)

    assert len(result) == 1
-    assert result[0]["type"] == ChatFileType.CSV
+    assert result[0]["type"] == ChatFileType.TABULAR


 def test_unknown_extension_defaults_to_plain_text() -> None:
--- a/backend/tests/unit/onyx/connectors/canvas/test_canvas_connector.py
+++ b/backend/tests/unit/onyx/connectors/canvas/test_canvas_connector.py
@@ -1,23 +1,15 @@
-"""Tests for Canvas connector — client, credentials, conversion."""
+"""Tests for Canvas connector — client (PR1)."""

-from datetime import datetime
-from datetime import timezone
 from typing import Any
 from unittest.mock import MagicMock
 from unittest.mock import patch

 import pytest

-from onyx.configs.constants import DocumentSource
 from onyx.connectors.canvas.client import CanvasApiClient
-from onyx.connectors.canvas.connector import CanvasConnector
-from onyx.connectors.exceptions import ConnectorValidationError
-from onyx.connectors.exceptions import CredentialExpiredError
-from onyx.connectors.exceptions import InsufficientPermissionsError
-from onyx.connectors.exceptions import UnexpectedValidationError
-from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.error_handling.exceptions import OnyxError

+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -26,77 +18,6 @@ FAKE_BASE_URL = "https://myschool.instructure.com"
 FAKE_TOKEN = "fake-canvas-token"


-def _mock_course(
-    course_id: int = 1,
-    name: str = "Intro to CS",
-    course_code: str = "CS101",
-) -> dict[str, Any]:
-    return {
-        "id": course_id,
-        "name": name,
-        "course_code": course_code,
-        "created_at": "2025-01-01T00:00:00Z",
-        "workflow_state": "available",
-    }
-
-
-def _build_connector(base_url: str = FAKE_BASE_URL) -> CanvasConnector:
-    """Build a connector with mocked credential validation."""
-    with patch("onyx.connectors.canvas.client.rl_requests") as mock_req:
-        mock_req.get.return_value = _mock_response(json_data=[_mock_course()])
-        connector = CanvasConnector(canvas_base_url=base_url)
-        connector.load_credentials({"canvas_access_token": FAKE_TOKEN})
-    return connector
-
-
-def _mock_page(
-    page_id: int = 10,
-    title: str = "Syllabus",
-    updated_at: str = "2025-06-01T12:00:00Z",
-) -> dict[str, Any]:
-    return {
-        "page_id": page_id,
-        "url": "syllabus",
-        "title": title,
-        "body": "<p>Welcome to the course</p>",
-        "created_at": "2025-01-15T00:00:00Z",
-        "updated_at": updated_at,
-    }
-
-
-def _mock_assignment(
-    assignment_id: int = 20,
-    name: str = "Homework 1",
-    course_id: int = 1,
-    updated_at: str = "2025-06-01T12:00:00Z",
-) -> dict[str, Any]:
-    return {
-        "id": assignment_id,
-        "name": name,
-        "description": "<p>Solve these problems</p>",
-        "html_url": f"{FAKE_BASE_URL}/courses/{course_id}/assignments/{assignment_id}",
-        "course_id": course_id,
-        "created_at": "2025-01-20T00:00:00Z",
-        "updated_at": updated_at,
-        "due_at": "2025-02-01T23:59:00Z",
-    }
-
-
-def _mock_announcement(
-    announcement_id: int = 30,
-    title: str = "Class Cancelled",
-    course_id: int = 1,
-    posted_at: str = "2025-06-01T12:00:00Z",
-) -> dict[str, Any]:
-    return {
-        "id": announcement_id,
-        "title": title,
-        "message": "<p>No class today</p>",
-        "html_url": f"{FAKE_BASE_URL}/courses/{course_id}/discussion_topics/{announcement_id}",
-        "posted_at": posted_at,
-    }
-
-
 def _mock_response(
    status_code: int = 200,
    json_data: Any = None,
@@ -404,57 +325,6 @@ class TestGet:
        assert result == expected


-# ---------------------------------------------------------------------------
-# CanvasApiClient.paginate tests
-# ---------------------------------------------------------------------------
-
-
-class TestPaginate:
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_single_page(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(
-            json_data=[{"id": 1}, {"id": 2}]
-        )
-        client = CanvasApiClient(
-            bearer_token=FAKE_TOKEN,
-            canvas_base_url=FAKE_BASE_URL,
-        )
-
-        pages = list(client.paginate("courses"))
-
-        assert len(pages) == 1
-        assert pages[0] == [{"id": 1}, {"id": 2}]
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_two_pages(self, mock_requests: MagicMock) -> None:
-        next_link = f'<{FAKE_BASE_URL}/api/v1/courses?page=2>; rel="next"'
-        page1 = _mock_response(json_data=[{"id": 1}], link_header=next_link)
-        page2 = _mock_response(json_data=[{"id": 2}])
-        mock_requests.get.side_effect = [page1, page2]
-        client = CanvasApiClient(
-            bearer_token=FAKE_TOKEN,
-            canvas_base_url=FAKE_BASE_URL,
-        )
-
-        pages = list(client.paginate("courses"))
-
-        assert len(pages) == 2
-        assert pages[0] == [{"id": 1}]
-        assert pages[1] == [{"id": 2}]
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_empty_response(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(json_data=[])
-        client = CanvasApiClient(
-            bearer_token=FAKE_TOKEN,
-            canvas_base_url=FAKE_BASE_URL,
-        )
-
-        pages = list(client.paginate("courses"))
-
-        assert pages == []
-
-
 # ---------------------------------------------------------------------------
 # CanvasApiClient._parse_next_link tests
 # ---------------------------------------------------------------------------
@@ -509,368 +379,3 @@ class TestParseNextLink:

        with pytest.raises(OnyxError, match="must use https"):
            self.client._parse_next_link(header)
-
-
-# ---------------------------------------------------------------------------
-# CanvasConnector — credential loading
-# ---------------------------------------------------------------------------
-
-
-class TestLoadCredentials:
-    def _assert_load_credentials_raises(
-        self,
-        status_code: int,
-        expected_error: type[Exception],
-        mock_requests: MagicMock,
-    ) -> None:
-        """Helper: assert load_credentials raises expected_error for a given status."""
-        mock_requests.get.return_value = _mock_response(status_code, {})
-        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)
-        with pytest.raises(expected_error):
-            connector.load_credentials({"canvas_access_token": FAKE_TOKEN})
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_load_credentials_success(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(json_data=[_mock_course()])
-        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)
-
-        result = connector.load_credentials({"canvas_access_token": FAKE_TOKEN})
-
-        assert result is None
-        assert connector._canvas_client is not None
-
-    def test_canvas_client_raises_without_credentials(self) -> None:
-        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)
-
-        with pytest.raises(ConnectorMissingCredentialError):
-            _ = connector.canvas_client
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_load_credentials_invalid_token(self, mock_requests: MagicMock) -> None:
-        self._assert_load_credentials_raises(401, CredentialExpiredError, mock_requests)
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_load_credentials_insufficient_permissions(
-        self, mock_requests: MagicMock
-    ) -> None:
-        self._assert_load_credentials_raises(
-            403, InsufficientPermissionsError, mock_requests
-        )
-
-
-# ---------------------------------------------------------------------------
-# CanvasConnector — URL normalization
-# ---------------------------------------------------------------------------
-
-
-class TestConnectorUrlNormalization:
-    def test_strips_api_v1_suffix(self) -> None:
-        connector = _build_connector(base_url=f"{FAKE_BASE_URL}/api/v1")
-
-        result = connector.canvas_base_url
-        expected = FAKE_BASE_URL
-
-        assert result == expected
-
-    def test_strips_trailing_slash(self) -> None:
-        connector = _build_connector(base_url=f"{FAKE_BASE_URL}/")
-
-        result = connector.canvas_base_url
-        expected = FAKE_BASE_URL
-
-        assert result == expected
-
-    def test_no_change_for_clean_url(self) -> None:
-        connector = _build_connector(base_url=FAKE_BASE_URL)
-
-        result = connector.canvas_base_url
-        expected = FAKE_BASE_URL
-
-        assert result == expected
-
-
-# ---------------------------------------------------------------------------
-# CanvasConnector — document conversion
-# ---------------------------------------------------------------------------
-
-
-class TestDocumentConversion:
-    def setup_method(self) -> None:
-        self.connector = _build_connector()
-
-    def test_convert_page_to_document(self) -> None:
-        from onyx.connectors.canvas.connector import CanvasPage
-
-        page = CanvasPage(
-            page_id=10,
-            url="syllabus",
-            title="Syllabus",
-            body="<p>Welcome</p>",
-            created_at="2025-01-15T00:00:00Z",
-            updated_at="2025-06-01T12:00:00Z",
-            course_id=1,
-        )
-
-        doc = self.connector._convert_page_to_document(page)
-
-        expected_id = "canvas-page-1-10"
-        expected_metadata = {"course_id": "1", "type": "page"}
-        expected_updated_at = datetime(2025, 6, 1, 12, 0, tzinfo=timezone.utc)
-
-        assert doc.id == expected_id
-        assert doc.source == DocumentSource.CANVAS
-        assert doc.semantic_identifier == "Syllabus"
-        assert doc.metadata == expected_metadata
-        assert doc.sections[0].link is not None
-        assert f"{FAKE_BASE_URL}/courses/1/pages/syllabus" in doc.sections[0].link
-        assert doc.doc_updated_at == expected_updated_at
-
-    def test_convert_page_without_body(self) -> None:
-        from onyx.connectors.canvas.connector import CanvasPage
-
-        page = CanvasPage(
-            page_id=11,
-            url="empty-page",
-            title="Empty Page",
-            body=None,
-            created_at="2025-01-15T00:00:00Z",
-            updated_at="2025-06-01T12:00:00Z",
-            course_id=1,
-        )
-
-        doc = self.connector._convert_page_to_document(page)
-        section_text = doc.sections[0].text
-        assert section_text is not None
-
-        assert "Empty Page" in section_text
-        assert "<p>" not in section_text
-
-    def test_convert_assignment_to_document(self) -> None:
-        from onyx.connectors.canvas.connector import CanvasAssignment
-
-        assignment = CanvasAssignment(
-            id=20,
-            name="Homework 1",
-            description="<p>Solve these</p>",
-            html_url=f"{FAKE_BASE_URL}/courses/1/assignments/20",
-            course_id=1,
-            created_at="2025-01-20T00:00:00Z",
-            updated_at="2025-06-01T12:00:00Z",
-            due_at="2025-02-01T23:59:00Z",
-        )
-
-        doc = self.connector._convert_assignment_to_document(assignment)
-
-        expected_id = "canvas-assignment-1-20"
-        expected_due_text = "Due: February 01, 2025 23:59 UTC"
-
-        assert doc.id == expected_id
-        assert doc.source == DocumentSource.CANVAS
-        assert doc.semantic_identifier == "Homework 1"
-        assert doc.sections[0].text is not None
-        assert expected_due_text in doc.sections[0].text
-
-    def test_convert_assignment_without_description(self) -> None:
-        from onyx.connectors.canvas.connector import CanvasAssignment
-
-        assignment = CanvasAssignment(
-            id=21,
-            name="Quiz 1",
-            description=None,
-            html_url=f"{FAKE_BASE_URL}/courses/1/assignments/21",
-            course_id=1,
-            created_at="2025-01-20T00:00:00Z",
-            updated_at="2025-06-01T12:00:00Z",
-            due_at=None,
-        )
-
-        doc = self.connector._convert_assignment_to_document(assignment)
-        section_text = doc.sections[0].text
-        assert section_text is not None
-
-        assert "Quiz 1" in section_text
-        assert "Due:" not in section_text
-
-    def test_convert_announcement_to_document(self) -> None:
-        from onyx.connectors.canvas.connector import CanvasAnnouncement
-
-        announcement = CanvasAnnouncement(
-            id=30,
-            title="Class Cancelled",
-            message="<p>No class today</p>",
-            html_url=f"{FAKE_BASE_URL}/courses/1/discussion_topics/30",
-            posted_at="2025-06-01T12:00:00Z",
-            course_id=1,
-        )
-
-        doc = self.connector._convert_announcement_to_document(announcement)
-
-        expected_id = "canvas-announcement-1-30"
-        expected_updated_at = datetime(2025, 6, 1, 12, 0, tzinfo=timezone.utc)
-
-        assert doc.id == expected_id
-        assert doc.source == DocumentSource.CANVAS
-        assert doc.semantic_identifier == "Class Cancelled"
-        assert doc.doc_updated_at == expected_updated_at
-
-    def test_convert_announcement_without_posted_at(self) -> None:
-        from onyx.connectors.canvas.connector import CanvasAnnouncement
-
-        announcement = CanvasAnnouncement(
-            id=31,
-            title="TBD Announcement",
-            message=None,
-            html_url=f"{FAKE_BASE_URL}/courses/1/discussion_topics/31",
-            posted_at=None,
-            course_id=1,
-        )
-
-        doc = self.connector._convert_announcement_to_document(announcement)
-
-        assert doc.doc_updated_at is None
-
-
-# ---------------------------------------------------------------------------
-# CanvasConnector — validate_connector_settings
-# ---------------------------------------------------------------------------
-
-
-class TestValidateConnectorSettings:
-    def _assert_validate_raises(
-        self,
-        status_code: int,
-        expected_error: type[Exception],
-        mock_requests: MagicMock,
-    ) -> None:
-        """Helper: assert validate_connector_settings raises expected_error."""
-        success_resp = _mock_response(json_data=[_mock_course()])
-        fail_resp = _mock_response(status_code, {})
-        mock_requests.get.side_effect = [success_resp, fail_resp]
-        connector = CanvasConnector(canvas_base_url=FAKE_BASE_URL)
-        connector.load_credentials({"canvas_access_token": FAKE_TOKEN})
-        with pytest.raises(expected_error):
-            connector.validate_connector_settings()
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_validate_success(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(json_data=[_mock_course()])
-        connector = _build_connector()
-
-        connector.validate_connector_settings()  # should not raise
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_validate_expired_credential(self, mock_requests: MagicMock) -> None:
-        self._assert_validate_raises(401, CredentialExpiredError, mock_requests)
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_validate_insufficient_permissions(self, mock_requests: MagicMock) -> None:
-        self._assert_validate_raises(403, InsufficientPermissionsError, mock_requests)
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_validate_rate_limited(self, mock_requests: MagicMock) -> None:
-        self._assert_validate_raises(429, ConnectorValidationError, mock_requests)
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_validate_unexpected_error(self, mock_requests: MagicMock) -> None:
-        self._assert_validate_raises(500, UnexpectedValidationError, mock_requests)
-
-
-# ---------------------------------------------------------------------------
-# _list_* pagination tests
-# ---------------------------------------------------------------------------
-
-
-class TestListCourses:
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_single_page(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(
-            json_data=[_mock_course(1), _mock_course(2, "CS201", "Data Structures")]
-        )
-        connector = _build_connector()
-
-        result = connector._list_courses()
-
-        assert len(result) == 2
-        assert result[0].id == 1
-        assert result[1].id == 2
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_empty_response(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(json_data=[])
-        connector = _build_connector()
-
-        result = connector._list_courses()
-
-        assert result == []
-
-
-class TestListPages:
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_single_page(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(
-            json_data=[_mock_page(10), _mock_page(11, "Notes")]
-        )
-        connector = _build_connector()
-
-        result = connector._list_pages(course_id=1)
-
-        assert len(result) == 2
-        assert result[0].page_id == 10
-        assert result[1].page_id == 11
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_empty_response(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(json_data=[])
-        connector = _build_connector()
-
-        result = connector._list_pages(course_id=1)
-
-        assert result == []
-
-
-class TestListAssignments:
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_single_page(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(
-            json_data=[_mock_assignment(20), _mock_assignment(21, "Quiz 1")]
-        )
-        connector = _build_connector()
-
-        result = connector._list_assignments(course_id=1)
-
-        assert len(result) == 2
-        assert result[0].id == 20
-        assert result[1].id == 21
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_empty_response(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(json_data=[])
-        connector = _build_connector()
-
-        result = connector._list_assignments(course_id=1)
-
-        assert result == []
-
-
-class TestListAnnouncements:
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_single_page(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(
-            json_data=[_mock_announcement(30), _mock_announcement(31, "Update")]
-        )
-        connector = _build_connector()
-
-        result = connector._list_announcements(course_id=1)
-
-        assert len(result) == 2
-        assert result[0].id == 30
-        assert result[1].id == 31
-
-    @patch("onyx.connectors.canvas.client.rl_requests")
-    def test_empty_response(self, mock_requests: MagicMock) -> None:
-        mock_requests.get.return_value = _mock_response(json_data=[])
-        connector = _build_connector()
-
-        result = connector._list_announcements(course_id=1)
-
-        assert result == []
--- a/backend/tests/unit/onyx/document_index/opensearch/test_opensearch_batch_flush.py
+++ b/backend/tests/unit/onyx/document_index/opensearch/test_opensearch_batch_flush.py
@@ -1,223 +0,0 @@
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-from onyx.access.models import DocumentAccess
-from onyx.configs.constants import DocumentSource
-from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
-from onyx.document_index.interfaces_new import IndexingMetadata
-from onyx.document_index.interfaces_new import TenantState
-from onyx.document_index.opensearch.opensearch_document_index import (
-    OpenSearchDocumentIndex,
-)
-from onyx.indexing.models import ChunkEmbedding
-from onyx.indexing.models import DocMetadataAwareIndexChunk
-
-
-def _make_chunk(
-    doc_id: str,
-    chunk_id: int,
-) -> DocMetadataAwareIndexChunk:
-    """Creates a minimal DocMetadataAwareIndexChunk for testing."""
-    doc = Document(
-        id=doc_id,
-        sections=[TextSection(text="test", link="http://test.com")],
-        source=DocumentSource.FILE,
-        semantic_identifier="test_doc",
-        metadata={},
-    )
-    access = DocumentAccess.build(
-        user_emails=[],
-        user_groups=[],
-        external_user_emails=[],
-        external_user_group_ids=[],
-        is_public=True,
-    )
-    return DocMetadataAwareIndexChunk(
-        chunk_id=chunk_id,
-        blurb="test",
-        content="test content",
-        source_links={0: "http://test.com"},
-        image_file_id=None,
-        section_continuation=False,
-        source_document=doc,
-        title_prefix="",
-        metadata_suffix_semantic="",
-        metadata_suffix_keyword="",
-        mini_chunk_texts=None,
-        large_chunk_id=None,
-        doc_summary="",
-        chunk_context="",
-        contextual_rag_reserved_tokens=0,
-        embeddings=ChunkEmbedding(full_embedding=[0.1] * 10, mini_chunk_embeddings=[]),
-        title_embedding=[0.1] * 10,
-        tenant_id="test_tenant",
-        access=access,
-        document_sets=set(),
-        user_project=[],
-        personas=[],
-        boost=0,
-        aggregated_chunk_boost_factor=1.0,
-        ancestor_hierarchy_node_ids=[],
-    )
-
-
-def _make_index() -> tuple[OpenSearchDocumentIndex, MagicMock]:
-    """Creates an OpenSearchDocumentIndex with a mocked client.
-    Returns the index and the mock for bulk_index_documents."""
-    mock_client = MagicMock()
-    mock_bulk = MagicMock()
-    mock_client.bulk_index_documents = mock_bulk
-
-    tenant_state = TenantState(tenant_id="test_tenant", multitenant=False)
-
-    index = OpenSearchDocumentIndex.__new__(OpenSearchDocumentIndex)
-    index._index_name = "test_index"
-    index._client = mock_client
-    index._tenant_state = tenant_state
-
-    return index, mock_bulk
-
-
-def _make_metadata(doc_id: str, chunk_count: int) -> IndexingMetadata:
-    return IndexingMetadata(
-        doc_id_to_chunk_cnt_diff={
-            doc_id: IndexingMetadata.ChunkCounts(
-                old_chunk_cnt=0,
-                new_chunk_cnt=chunk_count,
-            ),
-        },
-    )
-
-
-@patch(
-    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
-    100,
-)
-def test_single_doc_under_batch_limit_flushes_once() -> None:
-    """A document with fewer chunks than MAX_CHUNKS_PER_DOC_BATCH should flush once."""
-    index, mock_bulk = _make_index()
-    doc_id = "doc_1"
-    num_chunks = 50
-    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
-    metadata = _make_metadata(doc_id, num_chunks)
-
-    with patch.object(index, "delete", return_value=0):
-        index.index(chunks, metadata)
-
-    assert mock_bulk.call_count == 1
-    batch_arg = mock_bulk.call_args_list[0]
-    assert len(batch_arg.kwargs["documents"]) == num_chunks
-
-
-@patch(
-    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
-    100,
-)
-def test_single_doc_over_batch_limit_flushes_multiple_times() -> None:
-    """A document with more chunks than MAX_CHUNKS_PER_DOC_BATCH should flush multiple times."""
-    index, mock_bulk = _make_index()
-    doc_id = "doc_1"
-    num_chunks = 250
-    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
-    metadata = _make_metadata(doc_id, num_chunks)
-
-    with patch.object(index, "delete", return_value=0):
-        index.index(chunks, metadata)
-
-    # 250 chunks / 100 per batch = 3 flushes (100 + 100 + 50)
-    assert mock_bulk.call_count == 3
-    batch_sizes = [len(call.kwargs["documents"]) for call in mock_bulk.call_args_list]
-    assert batch_sizes == [100, 100, 50]
-
-
-@patch(
-    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
-    100,
-)
-def test_single_doc_exactly_at_batch_limit() -> None:
-    """A document with exactly MAX_CHUNKS_PER_DOC_BATCH chunks should flush once
-    (the flush happens on the next chunk, not at the boundary)."""
-    index, mock_bulk = _make_index()
-    doc_id = "doc_1"
-    num_chunks = 100
-    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
-    metadata = _make_metadata(doc_id, num_chunks)
-
-    with patch.object(index, "delete", return_value=0):
-        index.index(chunks, metadata)
-
-    # 100 chunks hit the >= check on chunk 101 which doesn't exist,
-    # so final flush handles all 100
-    # Actually: the elif fires when len(current_chunks) >= 100, which happens
-    # when current_chunks has 100 items and the 101st chunk arrives.
-    # With exactly 100 chunks, the 100th chunk makes len == 99, then appended -> 100.
-    # No 101st chunk arrives, so the final flush handles all 100.
-    assert mock_bulk.call_count == 1
-
-
-@patch(
-    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
-    100,
-)
-def test_single_doc_one_over_batch_limit() -> None:
-    """101 chunks for one doc: first 100 flushed when the 101st arrives, then
-    the 101st is flushed at the end."""
-    index, mock_bulk = _make_index()
-    doc_id = "doc_1"
-    num_chunks = 101
-    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
-    metadata = _make_metadata(doc_id, num_chunks)
-
-    with patch.object(index, "delete", return_value=0):
-        index.index(chunks, metadata)
-
-    assert mock_bulk.call_count == 2
-    batch_sizes = [len(call.kwargs["documents"]) for call in mock_bulk.call_args_list]
-    assert batch_sizes == [100, 1]
-
-
-@patch(
-    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
-    100,
-)
-def test_multiple_docs_each_under_limit_flush_per_doc() -> None:
-    """Multiple documents each under the batch limit should flush once per document."""
-    index, mock_bulk = _make_index()
-    chunks = []
-    for doc_idx in range(3):
-        doc_id = f"doc_{doc_idx}"
-        for chunk_idx in range(50):
-            chunks.append(_make_chunk(doc_id, chunk_idx))
-
-    metadata = IndexingMetadata(
-        doc_id_to_chunk_cnt_diff={
-            f"doc_{i}": IndexingMetadata.ChunkCounts(old_chunk_cnt=0, new_chunk_cnt=50)
-            for i in range(3)
-        },
-    )
-
-    with patch.object(index, "delete", return_value=0):
-        index.index(chunks, metadata)
-
-    # 3 documents = 3 flushes (one per doc boundary + final)
-    assert mock_bulk.call_count == 3
-
-
-@patch(
-    "onyx.document_index.opensearch.opensearch_document_index.MAX_CHUNKS_PER_DOC_BATCH",
-    100,
-)
-def test_delete_called_once_per_document() -> None:
-    """Even with multiple flushes for a single document, delete should only be
-    called once per document."""
-    index, _mock_bulk = _make_index()
-    doc_id = "doc_1"
-    num_chunks = 250
-    chunks = [_make_chunk(doc_id, i) for i in range(num_chunks)]
-    metadata = _make_metadata(doc_id, num_chunks)
-
-    with patch.object(index, "delete", return_value=0) as mock_delete:
-        index.index(chunks, metadata)
-
-    mock_delete.assert_called_once_with(doc_id, None)
--- a/backend/tests/unit/onyx/document_index/vespa/test_vespa_batch_flush.py
+++ b/backend/tests/unit/onyx/document_index/vespa/test_vespa_batch_flush.py
@@ -1,152 +0,0 @@
-"""Unit tests for VespaDocumentIndex.index().
-
-These tests mock all external I/O (HTTP calls, thread pools) and verify
-the streaming logic, ID cleaning/mapping, and DocumentInsertionRecord
-construction.
-"""
-
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-from onyx.access.models import DocumentAccess
-from onyx.configs.constants import DocumentSource
-from onyx.connectors.models import Document
-from onyx.connectors.models import TextSection
-from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
-from onyx.document_index.interfaces_new import IndexingMetadata
-from onyx.document_index.interfaces_new import TenantState
-from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
-from onyx.indexing.models import ChunkEmbedding
-from onyx.indexing.models import DocMetadataAwareIndexChunk
-from onyx.indexing.models import IndexChunk
-
-
-def _make_chunk(
-    doc_id: str,
-    chunk_id: int = 0,
-    content: str = "test content",
-) -> DocMetadataAwareIndexChunk:
-    doc = Document(
-        id=doc_id,
-        semantic_identifier="test_doc",
-        sections=[TextSection(text=content, link=None)],
-        source=DocumentSource.NOT_APPLICABLE,
-        metadata={},
-    )
-    index_chunk = IndexChunk(
-        chunk_id=chunk_id,
-        blurb=content[:50],
-        content=content,
-        source_links=None,
-        image_file_id=None,
-        section_continuation=False,
-        source_document=doc,
-        title_prefix="",
-        metadata_suffix_semantic="",
-        metadata_suffix_keyword="",
-        contextual_rag_reserved_tokens=0,
-        doc_summary="",
-        chunk_context="",
-        mini_chunk_texts=None,
-        large_chunk_id=None,
-        embeddings=ChunkEmbedding(
-            full_embedding=[0.1] * 10,
-            mini_chunk_embeddings=[],
-        ),
-        title_embedding=None,
-    )
-    access = DocumentAccess.build(
-        user_emails=[],
-        user_groups=[],
-        external_user_emails=[],
-        external_user_group_ids=[],
-        is_public=True,
-    )
-    return DocMetadataAwareIndexChunk.from_index_chunk(
-        index_chunk=index_chunk,
-        access=access,
-        document_sets=set(),
-        user_project=[],
-        personas=[],
-        boost=0,
-        aggregated_chunk_boost_factor=1.0,
-        tenant_id="test_tenant",
-    )
-
-
-def _make_indexing_metadata(
-    doc_ids: list[str],
-    old_counts: list[int],
-    new_counts: list[int],
-) -> IndexingMetadata:
-    return IndexingMetadata(
-        doc_id_to_chunk_cnt_diff={
-            doc_id: IndexingMetadata.ChunkCounts(
-                old_chunk_cnt=old,
-                new_chunk_cnt=new,
-            )
-            for doc_id, old, new in zip(doc_ids, old_counts, new_counts)
-        }
-    )
-
-
-def _stub_enrich(
-    doc_id: str,
-    old_chunk_cnt: int,
-) -> EnrichedDocumentIndexingInfo:
-    """Build an EnrichedDocumentIndexingInfo that says 'no chunks to delete'
-    when old_chunk_cnt == 0, or 'has existing chunks' otherwise."""
-    return EnrichedDocumentIndexingInfo(
-        doc_id=doc_id,
-        chunk_start_index=0,
-        old_version=False,
-        chunk_end_index=old_chunk_cnt,
-    )
-
-
-@patch("onyx.document_index.vespa.vespa_document_index.batch_index_vespa_chunks")
-@patch("onyx.document_index.vespa.vespa_document_index.delete_vespa_chunks")
-@patch(
-    "onyx.document_index.vespa.vespa_document_index.get_document_chunk_ids",
-    return_value=[],
-)
-@patch("onyx.document_index.vespa.vespa_document_index._enrich_basic_chunk_info")
-@patch(
-    "onyx.document_index.vespa.vespa_document_index.BATCH_SIZE",
-    3,
-)
-def test_index_respects_batch_size(
-    mock_enrich: MagicMock,
-    mock_get_chunk_ids: MagicMock,  # noqa: ARG001
-    mock_delete: MagicMock,  # noqa: ARG001
-    mock_batch_index: MagicMock,
-) -> None:
-    """When chunks exceed BATCH_SIZE, batch_index_vespa_chunks is called
-    multiple times with correctly sized batches."""
-    mock_enrich.return_value = _stub_enrich("doc1", old_chunk_cnt=0)
-
-    index = VespaDocumentIndex(
-        index_name="test_index",
-        tenant_state=TenantState(tenant_id="test_tenant", multitenant=False),
-        large_chunks_enabled=False,
-        httpx_client=MagicMock(),
-    )
-
-    chunks = [_make_chunk("doc1", chunk_id=i) for i in range(7)]
-    metadata = _make_indexing_metadata(["doc1"], old_counts=[0], new_counts=[7])
-
-    results = index.index(chunks=chunks, indexing_metadata=metadata)
-
-    assert len(results) == 1
-
-    # With BATCH_SIZE=3 and 7 chunks: batches of 3, 3, 1
-    assert mock_batch_index.call_count == 3
-    batch_sizes = [len(c.kwargs["chunks"]) for c in mock_batch_index.call_args_list]
-    assert batch_sizes == [3, 3, 1]
-
-    # Verify all chunks are accounted for and in order
-    all_indexed = [
-        chunk for c in mock_batch_index.call_args_list for chunk in c.kwargs["chunks"]
-    ]
-    assert len(all_indexed) == 7
-    assert [c.chunk_id for c in all_indexed] == list(range(7))
--- a/backend/tests/unit/onyx/indexing/test_embed_chunks_in_batches.py
+++ b/backend/tests/unit/onyx/indexing/test_embed_chunks_in_batches.py
@@ -1,391 +0,0 @@
-"""Unit tests for _embed_chunks_to_store.
-
-Tests cover:
-  - Single batch, no failures
-  - Multiple batches, no failures
-  - Failure in a single batch
-  - Cross-batch document failure scrubbing
-  - Later batches skip already-failed docs
-  - Empty input
-  - All chunks fail
-"""
-
-from collections.abc import Callable
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-from onyx.connectors.models import ConnectorFailure
-from onyx.connectors.models import Document
-from onyx.connectors.models import DocumentFailure
-from onyx.connectors.models import DocumentSource
-from onyx.connectors.models import TextSection
-from onyx.indexing.chunk_batch_store import ChunkBatchStore
-from onyx.indexing.indexing_pipeline import _embed_chunks_to_store
-from onyx.indexing.models import ChunkEmbedding
-from onyx.indexing.models import DocAwareChunk
-from onyx.indexing.models import IndexChunk
-
-
-def _make_doc(doc_id: str) -> Document:
-    return Document(
-        id=doc_id,
-        semantic_identifier="test",
-        source=DocumentSource.FILE,
-        sections=[TextSection(text="test", link=None)],
-        metadata={},
-    )
-
-
-def _make_chunk(doc_id: str, chunk_id: int) -> DocAwareChunk:
-    return DocAwareChunk(
-        chunk_id=chunk_id,
-        blurb="test",
-        content="test content",
-        source_links=None,
-        image_file_id=None,
-        section_continuation=False,
-        source_document=_make_doc(doc_id),
-        title_prefix="",
-        metadata_suffix_semantic="",
-        metadata_suffix_keyword="",
-        mini_chunk_texts=None,
-        large_chunk_id=None,
-        doc_summary="",
-        chunk_context="",
-        contextual_rag_reserved_tokens=0,
-    )
-
-
-def _make_index_chunk(doc_id: str, chunk_id: int) -> IndexChunk:
-    """Create an IndexChunk (a DocAwareChunk with embeddings)."""
-    return IndexChunk(
-        chunk_id=chunk_id,
-        blurb="test",
-        content="test content",
-        source_links=None,
-        image_file_id=None,
-        section_continuation=False,
-        source_document=_make_doc(doc_id),
-        title_prefix="",
-        metadata_suffix_semantic="",
-        metadata_suffix_keyword="",
-        mini_chunk_texts=None,
-        large_chunk_id=None,
-        doc_summary="",
-        chunk_context="",
-        contextual_rag_reserved_tokens=0,
-        embeddings=ChunkEmbedding(
-            full_embedding=[0.1] * 10,
-            mini_chunk_embeddings=[],
-        ),
-        title_embedding=None,
-    )
-
-
-def _make_failure(doc_id: str) -> ConnectorFailure:
-    return ConnectorFailure(
-        failed_document=DocumentFailure(document_id=doc_id, document_link=None),
-        failure_message="embedding failed",
-        exception=RuntimeError("embedding failed"),
-    )
-
-
-def _mock_embed_success(
-    chunks: list[DocAwareChunk], **_kwargs: object
-) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
-    """Simulate successful embedding of all chunks."""
-    return (
-        [_make_index_chunk(c.source_document.id, c.chunk_id) for c in chunks],
-        [],
-    )
-
-
-def _mock_embed_fail_doc(
-    fail_doc_id: str,
-) -> Callable[..., tuple[list[IndexChunk], list[ConnectorFailure]]]:
-    """Return an embed mock that fails all chunks for a specific doc."""
-
-    def _embed(
-        chunks: list[DocAwareChunk], **_kwargs: object
-    ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
-        successes = [
-            _make_index_chunk(c.source_document.id, c.chunk_id)
-            for c in chunks
-            if c.source_document.id != fail_doc_id
-        ]
-        failures = (
-            [_make_failure(fail_doc_id)]
-            if any(c.source_document.id == fail_doc_id for c in chunks)
-            else []
-        )
-        return successes, failures
-
-    return _embed
-
-
-class TestEmbedChunksInBatches:
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 100)
-    def test_single_batch_no_failures(self, mock_embed: MagicMock) -> None:
-        """All chunks fit in one batch and embed successfully."""
-        mock_embed.side_effect = _mock_embed_success
-
-        with ChunkBatchStore() as store:
-            chunks = [_make_chunk("doc1", i) for i in range(3)]
-            result = _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            assert len(result.successful_chunk_ids) == 3
-            assert len(result.connector_failures) == 0
-
-            # Verify stored contents
-            assert len(store._batch_files()) == 1
-            stored = list(store.stream())
-            assert len(stored) == 3
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
-    def test_multiple_batches_no_failures(self, mock_embed: MagicMock) -> None:
-        """Chunks are split across multiple batches, all succeed."""
-        mock_embed.side_effect = _mock_embed_success
-
-        with ChunkBatchStore() as store:
-            chunks = [_make_chunk("doc1", i) for i in range(7)]
-            result = _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            assert len(result.successful_chunk_ids) == 7
-            assert len(result.connector_failures) == 0
-            assert len(store._batch_files()) == 3  # 3 + 3 + 1
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 100)
-    def test_single_batch_with_failure(self, mock_embed: MagicMock) -> None:
-        """One doc fails embedding, its chunks are excluded from results."""
-        mock_embed.side_effect = _mock_embed_fail_doc("doc2")
-
-        with ChunkBatchStore() as store:
-            chunks = [
-                _make_chunk("doc1", 0),
-                _make_chunk("doc2", 1),
-                _make_chunk("doc1", 2),
-            ]
-            result = _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            assert len(result.connector_failures) == 1
-            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}
-            assert "doc2" not in successful_doc_ids
-            assert "doc1" in successful_doc_ids
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
-    def test_cross_batch_failure_scrubs_earlier_batch(
-        self, mock_embed: MagicMock
-    ) -> None:
-        """Doc A spans batches 0 and 1.  It succeeds in batch 0 but fails in
-        batch 1.  Its chunks should be scrubbed from batch 0's batch file."""
-        call_count = 0
-
-        def _embed(
-            chunks: list[DocAwareChunk], **_kwargs: object
-        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
-            nonlocal call_count
-            call_count += 1
-            if call_count == 1:
-                return _mock_embed_success(chunks)
-            else:
-                return _mock_embed_fail_doc("docA")(chunks)
-
-        mock_embed.side_effect = _embed
-
-        with ChunkBatchStore() as store:
-            chunks = [
-                _make_chunk("docA", 0),
-                _make_chunk("docA", 1),
-                _make_chunk("docA", 2),
-                _make_chunk("docA", 3),
-                _make_chunk("docB", 0),
-                _make_chunk("docB", 1),
-            ]
-            result = _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            # docA should be fully excluded from results
-            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}
-            assert "docA" not in successful_doc_ids
-            assert "docB" in successful_doc_ids
-            assert len(result.connector_failures) == 1
-
-            # Verify batch 0 was scrubbed of docA chunks
-            all_stored = list(store.stream())
-            stored_doc_ids = {c.source_document.id for c in all_stored}
-            assert "docA" not in stored_doc_ids
-            assert "docB" in stored_doc_ids
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
-    def test_later_batch_skips_already_failed_doc(self, mock_embed: MagicMock) -> None:
-        """If docA fails in batch 0, its chunks in batch 1 are skipped
-        entirely (never sent to the embedder)."""
-        embedded_doc_ids: list[str] = []
-
-        def _embed(
-            chunks: list[DocAwareChunk], **_kwargs: object
-        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
-            for c in chunks:
-                embedded_doc_ids.append(c.source_document.id)
-            return _mock_embed_fail_doc("docA")(chunks)
-
-        mock_embed.side_effect = _embed
-
-        with ChunkBatchStore() as store:
-            chunks = [
-                _make_chunk("docA", 0),
-                _make_chunk("docA", 1),
-                _make_chunk("docA", 2),
-                _make_chunk("docA", 3),
-                _make_chunk("docB", 0),
-                _make_chunk("docB", 1),
-            ]
-            _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-        # docA should only appear in batch 0, not batch 1
-        batch_1_doc_ids = embedded_doc_ids[3:]
-        assert "docA" not in batch_1_doc_ids
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 3)
-    def test_failed_doc_skipped_in_later_batch_while_other_doc_succeeds(
-        self, mock_embed: MagicMock
-    ) -> None:
-        """doc1 spans batches 0 and 1, doc2 only in batch 1.  Batch 0 fails
-        doc1.  In batch 1, doc1 chunks should be skipped but doc2 chunks
-        should still be embedded successfully."""
-        embedded_chunks: list[list[str]] = []
-
-        def _embed(
-            chunks: list[DocAwareChunk], **_kwargs: object
-        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
-            embedded_chunks.append([c.source_document.id for c in chunks])
-            return _mock_embed_fail_doc("doc1")(chunks)
-
-        mock_embed.side_effect = _embed
-
-        with ChunkBatchStore() as store:
-            chunks = [
-                _make_chunk("doc1", 0),
-                _make_chunk("doc1", 1),
-                _make_chunk("doc1", 2),
-                _make_chunk("doc1", 3),
-                _make_chunk("doc2", 0),
-                _make_chunk("doc2", 1),
-            ]
-            result = _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            # doc1 should be fully excluded, doc2 fully included
-            successful_doc_ids = {doc_id for _, doc_id in result.successful_chunk_ids}
-            assert "doc1" not in successful_doc_ids
-            assert "doc2" in successful_doc_ids
-            assert len(result.successful_chunk_ids) == 2  # doc2's 2 chunks
-
-            # Batch 1 should only contain doc2 (doc1 was filtered before embedding)
-            assert len(embedded_chunks) == 2
-            assert "doc1" not in embedded_chunks[1]
-            assert embedded_chunks[1] == ["doc2", "doc2"]
-
-            # Verify on-disk state has no doc1 chunks
-            all_stored = list(store.stream())
-            assert all(c.source_document.id == "doc2" for c in all_stored)
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    def test_empty_input(self, mock_embed: MagicMock) -> None:
-        """Empty chunk list produces empty results."""
-        mock_embed.side_effect = _mock_embed_success
-
-        with ChunkBatchStore() as store:
-            result = _embed_chunks_to_store(
-                chunks=[],
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            assert len(result.successful_chunk_ids) == 0
-            assert len(result.connector_failures) == 0
-            mock_embed.assert_not_called()
-
-    @patch(
-        "onyx.indexing.indexing_pipeline.embed_chunks_with_failure_handling",
-    )
-    @patch("onyx.indexing.indexing_pipeline.MAX_CHUNKS_PER_DOC_BATCH", 100)
-    def test_all_chunks_fail(self, mock_embed: MagicMock) -> None:
-        """When all documents fail, results have no successful chunks."""
-
-        def _fail_all(
-            chunks: list[DocAwareChunk], **_kwargs: object
-        ) -> tuple[list[IndexChunk], list[ConnectorFailure]]:
-            doc_ids = {c.source_document.id for c in chunks}
-            return [], [_make_failure(doc_id) for doc_id in doc_ids]
-
-        mock_embed.side_effect = _fail_all
-
-        with ChunkBatchStore() as store:
-            chunks = [_make_chunk("doc1", 0), _make_chunk("doc2", 1)]
-            result = _embed_chunks_to_store(
-                chunks=chunks,
-                embedder=MagicMock(),
-                tenant_id="test",
-                request_id=None,
-                store=store,
-            )
-
-            assert len(result.successful_chunk_ids) == 0
-            assert len(result.connector_failures) == 2
--- a/backend/tests/unit/onyx/indexing/test_personas_in_chunks.py
+++ b/backend/tests/unit/onyx/indexing/test_personas_in_chunks.py
@@ -116,7 +116,7 @@ def _run_adapter_build(
    project_ids_map: dict[str, list[int]],
    persona_ids_map: dict[str, list[int]],
 ) -> list[DocMetadataAwareIndexChunk]:
-    """Helper that runs UserFileIndexingAdapter.prepare_enrichment + enrich_chunk
+    """Helper that runs UserFileIndexingAdapter.build_metadata_aware_chunks
    with all external dependencies mocked."""
    from onyx.indexing.adapters.user_file_indexing_adapter import (
        UserFileIndexingAdapter,
@@ -155,16 +155,18 @@ def _run_adapter_build(
            side_effect=Exception("no LLM in tests"),
        ),
    ):
-        enricher = adapter.prepare_enrichment(
-            context=context,
+        result = adapter.build_metadata_aware_chunks(
+            chunks_with_embeddings=[chunk],
+            chunk_content_scores=[1.0],
            tenant_id="test_tenant",
-            chunks=[chunk],
+            context=context,
        )
-        return [enricher.enrich_chunk(chunk, 1.0)]
+
+    return result.chunks


-def test_prepare_enrichment_includes_persona_ids() -> None:
-    """UserFileIndexingAdapter.prepare_enrichment writes persona IDs
+def test_build_metadata_aware_chunks_includes_persona_ids() -> None:
+    """UserFileIndexingAdapter.build_metadata_aware_chunks writes persona IDs
    fetched from the DB into each chunk's metadata."""
    file_id = str(uuid4())
    persona_ids = [5, 12]
@@ -181,7 +183,7 @@ def test_prepare_enrichment_includes_persona_ids() -> None:
    assert chunks[0].user_project == project_ids


-def test_prepare_enrichment_missing_file_defaults_to_empty() -> None:
+def test_build_metadata_aware_chunks_missing_file_defaults_to_empty() -> None:
    """When a file has no persona or project associations in the DB, the
    adapter should default to empty lists (not KeyError or None)."""
    file_id = str(uuid4())
--- a/backend/tests/unit/onyx/llm/test_multi_llm.py
+++ b/backend/tests/unit/onyx/llm/test_multi_llm.py
@@ -11,7 +11,6 @@ from litellm.types.utils import ChatCompletionDeltaToolCall
 from litellm.types.utils import Delta
 from litellm.types.utils import Function as LiteLLMFunction

-import onyx.llm.models
 from onyx.configs.app_configs import MOCK_LLM_RESPONSE
 from onyx.llm.constants import LlmProviderNames
 from onyx.llm.interfaces import LLMUserIdentity
@@ -1480,147 +1479,6 @@ def test_bifrost_normalizes_api_base_in_model_kwargs() -> None:
    assert llm._model_kwargs["api_base"] == "https://bifrost.example.com/v1"


-def test_prompt_contains_tool_call_history_true() -> None:
-    from onyx.llm.multi_llm import _prompt_contains_tool_call_history
-
-    messages: LanguageModelInput = [
-        UserMessage(content="What's the weather?"),
-        AssistantMessage(
-            content=None,
-            tool_calls=[
-                ToolCall(
-                    id="tc_1",
-                    function=FunctionCall(name="get_weather", arguments="{}"),
-                )
-            ],
-        ),
-    ]
-    assert _prompt_contains_tool_call_history(messages) is True
-
-
-def test_prompt_contains_tool_call_history_false_no_tools() -> None:
-    from onyx.llm.multi_llm import _prompt_contains_tool_call_history
-
-    messages: LanguageModelInput = [
-        UserMessage(content="Hello"),
-        AssistantMessage(content="Hi there!"),
-    ]
-    assert _prompt_contains_tool_call_history(messages) is False
-
-
-def test_prompt_contains_tool_call_history_false_user_only() -> None:
-    from onyx.llm.multi_llm import _prompt_contains_tool_call_history
-
-    messages: LanguageModelInput = [UserMessage(content="Hello")]
-    assert _prompt_contains_tool_call_history(messages) is False
-
-
-def test_bedrock_claude_drops_thinking_when_thinking_blocks_missing() -> None:
-    """When thinking is enabled but assistant messages with tool_calls lack
-    thinking_blocks, the thinking param must be dropped to avoid the Bedrock
-    BadRequestError about missing thinking blocks."""
-    llm = LitellmLLM(
-        api_key=None,
-        timeout=30,
-        model_provider=LlmProviderNames.BEDROCK,
-        model_name="anthropic.claude-sonnet-4-20250514-v1:0",
-        max_input_tokens=200000,
-    )
-
-    messages: LanguageModelInput = [
-        UserMessage(content="What's the weather?"),
-        AssistantMessage(
-            content=None,
-            tool_calls=[
-                ToolCall(
-                    id="tc_1",
-                    function=FunctionCall(
-                        name="get_weather",
-                        arguments='{"city": "Paris"}',
-                    ),
-                )
-            ],
-        ),
-        onyx.llm.models.ToolMessage(
-            content="22°C sunny",
-            tool_call_id="tc_1",
-        ),
-    ]
-
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "get_weather",
-                "description": "Get the weather",
-                "parameters": {
-                    "type": "object",
-                    "properties": {"city": {"type": "string"}},
-                },
-            },
-        }
-    ]
-
-    with (
-        patch("litellm.completion") as mock_completion,
-        patch("onyx.llm.multi_llm.model_is_reasoning_model", return_value=True),
-    ):
-        mock_completion.return_value = []
-
-        list(llm.stream(messages, tools=tools, reasoning_effort=ReasoningEffort.HIGH))
-
-        kwargs = mock_completion.call_args.kwargs
-        assert "thinking" not in kwargs, (
-            "thinking param should be dropped when thinking_blocks are missing "
-            "from assistant messages with tool_calls"
-        )
-
-
-def test_bedrock_claude_keeps_thinking_when_no_tool_history() -> None:
-    """When thinking is enabled and there are no historical assistant messages
-    with tool_calls, the thinking param should be preserved."""
-    llm = LitellmLLM(
-        api_key=None,
-        timeout=30,
-        model_provider=LlmProviderNames.BEDROCK,
-        model_name="anthropic.claude-sonnet-4-20250514-v1:0",
-        max_input_tokens=200000,
-    )
-
-    messages: LanguageModelInput = [
-        UserMessage(content="What's the weather?"),
-    ]
-
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "get_weather",
-                "description": "Get the weather",
-                "parameters": {
-                    "type": "object",
-                    "properties": {"city": {"type": "string"}},
-                },
-            },
-        }
-    ]
-
-    with (
-        patch("litellm.completion") as mock_completion,
-        patch("onyx.llm.multi_llm.model_is_reasoning_model", return_value=True),
-    ):
-        mock_completion.return_value = []
-
-        list(llm.stream(messages, tools=tools, reasoning_effort=ReasoningEffort.HIGH))
-
-        kwargs = mock_completion.call_args.kwargs
-        assert "thinking" in kwargs, (
-            "thinking param should be preserved when no assistant messages "
-            "with tool_calls exist in history"
-        )
-        assert kwargs["thinking"]["type"] == "enabled"
-
-
 def test_bifrost_claude_includes_allowed_openai_params() -> None:
    llm = LitellmLLM(
        api_key="test_key",
--- a/backend/tests/unit/onyx/tools/test_tool_runner_chat_files.py
+++ b/backend/tests/unit/onyx/tools/test_tool_runner_chat_files.py
@@ -82,7 +82,7 @@ class TestChatFileConversion:
            ChatLoadedFile(
                file_id="file-2",
                content=b"csv,data\n1,2",
-                file_type=ChatFileType.CSV,
+                file_type=ChatFileType.TABULAR,
                filename="data.csv",
                content_text="csv,data\n1,2",
                token_count=5,
--- a/backend/tests/unit/server/metrics/test_indexing_pipeline_collectors.py
+++ b/backend/tests/unit/server/metrics/test_indexing_pipeline_collectors.py
@@ -1,6 +1,5 @@
 """Tests for indexing pipeline Prometheus collectors."""

-from collections.abc import Iterator
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
@@ -14,16 +13,6 @@ from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
 from onyx.server.metrics.indexing_pipeline import QueueDepthCollector


-@pytest.fixture(autouse=True)
-def _mock_broker_client() -> Iterator[None]:
-    """Patch celery_get_broker_client for all collector tests."""
-    with patch(
-        "onyx.background.celery.celery_redis.celery_get_broker_client",
-        return_value=MagicMock(),
-    ):
-        yield
-
-
 class TestQueueDepthCollector:
    def test_returns_empty_when_factory_not_set(self) -> None:
        collector = QueueDepthCollector()
@@ -35,7 +24,8 @@ class TestQueueDepthCollector:

    def test_collects_queue_depths(self) -> None:
        collector = QueueDepthCollector(cache_ttl=0)
-        collector.set_celery_app(MagicMock())
+        mock_redis = MagicMock()
+        collector.set_redis_factory(lambda: mock_redis)

        with (
            patch(
@@ -70,8 +60,8 @@ class TestQueueDepthCollector:

    def test_handles_redis_error_gracefully(self) -> None:
        collector = QueueDepthCollector(cache_ttl=0)
-        MagicMock()
-        collector.set_celery_app(MagicMock())
+        mock_redis = MagicMock()
+        collector.set_redis_factory(lambda: mock_redis)

        with patch(
            "onyx.server.metrics.indexing_pipeline.celery_get_queue_length",
@@ -84,8 +74,8 @@ class TestQueueDepthCollector:

    def test_caching_returns_stale_within_ttl(self) -> None:
        collector = QueueDepthCollector(cache_ttl=60)
-        MagicMock()
-        collector.set_celery_app(MagicMock())
+        mock_redis = MagicMock()
+        collector.set_redis_factory(lambda: mock_redis)

        with (
            patch(
@@ -108,10 +98,31 @@ class TestQueueDepthCollector:

        assert first is second  # Same object, from cache

+    def test_factory_called_each_scrape(self) -> None:
+        """Verify the Redis factory is called on each fresh collect, not cached."""
+        collector = QueueDepthCollector(cache_ttl=0)
+        factory = MagicMock(return_value=MagicMock())
+        collector.set_redis_factory(factory)
+
+        with (
+            patch(
+                "onyx.server.metrics.indexing_pipeline.celery_get_queue_length",
+                return_value=0,
+            ),
+            patch(
+                "onyx.server.metrics.indexing_pipeline.celery_get_unacked_task_ids",
+                return_value=set(),
+            ),
+        ):
+            collector.collect()
+            collector.collect()
+
+        assert factory.call_count == 2
+
    def test_error_returns_stale_cache(self) -> None:
        collector = QueueDepthCollector(cache_ttl=0)
-        MagicMock()
-        collector.set_celery_app(MagicMock())
+        mock_redis = MagicMock()
+        collector.set_redis_factory(lambda: mock_redis)

        # First call succeeds
        with (
--- a/backend/tests/unit/server/metrics/test_indexing_pipeline_setup.py
+++ b/backend/tests/unit/server/metrics/test_indexing_pipeline_setup.py
@@ -1,22 +1,96 @@
-"""Tests for indexing pipeline setup."""
+"""Tests for indexing pipeline setup (Redis factory caching)."""

 from unittest.mock import MagicMock

-from onyx.server.metrics.indexing_pipeline import QueueDepthCollector
-from onyx.server.metrics.indexing_pipeline import RedisHealthCollector
+from onyx.server.metrics.indexing_pipeline_setup import _make_broker_redis_factory


-class TestCollectorCeleryAppSetup:
-    def test_queue_depth_collector_uses_celery_app(self) -> None:
-        """QueueDepthCollector.set_celery_app stores the app for broker access."""
-        collector = QueueDepthCollector()
-        mock_app = MagicMock()
-        collector.set_celery_app(mock_app)
-        assert collector._celery_app is mock_app
+def _make_mock_app(client: MagicMock) -> MagicMock:
+    """Create a mock Celery app whose broker_connection().channel().client
+    returns the given client."""
+    mock_app = MagicMock()
+    mock_conn = MagicMock()
+    mock_conn.channel.return_value.client = client

-    def test_redis_health_collector_uses_celery_app(self) -> None:
-        """RedisHealthCollector.set_celery_app stores the app for broker access."""
-        collector = RedisHealthCollector()
-        mock_app = MagicMock()
-        collector.set_celery_app(mock_app)
-        assert collector._celery_app is mock_app
+    mock_app.broker_connection.return_value = mock_conn
+
+    return mock_app
+
+
+class TestMakeBrokerRedisFactory:
+    def test_caches_redis_client_across_calls(self) -> None:
+        """Factory should reuse the same client on subsequent calls."""
+        mock_client = MagicMock()
+        mock_client.ping.return_value = True
+        mock_app = _make_mock_app(mock_client)
+
+        factory = _make_broker_redis_factory(mock_app)
+
+        client1 = factory()
+        client2 = factory()
+
+        assert client1 is client2
+        # broker_connection should only be called once
+        assert mock_app.broker_connection.call_count == 1
+
+    def test_reconnects_when_ping_fails(self) -> None:
+        """Factory should create a new client if ping fails (stale connection)."""
+        mock_client_stale = MagicMock()
+        mock_client_stale.ping.side_effect = ConnectionError("disconnected")
+
+        mock_client_fresh = MagicMock()
+        mock_client_fresh.ping.return_value = True
+
+        mock_app = _make_mock_app(mock_client_stale)
+
+        factory = _make_broker_redis_factory(mock_app)
+
+        # First call — creates and caches
+        client1 = factory()
+        assert client1 is mock_client_stale
+        assert mock_app.broker_connection.call_count == 1
+
+        # Switch to fresh client for next connection
+        mock_conn_fresh = MagicMock()
+        mock_conn_fresh.channel.return_value.client = mock_client_fresh
+        mock_app.broker_connection.return_value = mock_conn_fresh
+
+        # Second call — ping fails on stale, reconnects
+        client2 = factory()
+        assert client2 is mock_client_fresh
+        assert mock_app.broker_connection.call_count == 2
+
+    def test_reconnect_closes_stale_client(self) -> None:
+        """When ping fails, the old client should be closed before reconnecting."""
+        mock_client_stale = MagicMock()
+        mock_client_stale.ping.side_effect = ConnectionError("disconnected")
+
+        mock_client_fresh = MagicMock()
+        mock_client_fresh.ping.return_value = True
+
+        mock_app = _make_mock_app(mock_client_stale)
+
+        factory = _make_broker_redis_factory(mock_app)
+
+        # First call — creates and caches
+        factory()
+
+        # Switch to fresh client
+        mock_conn_fresh = MagicMock()
+        mock_conn_fresh.channel.return_value.client = mock_client_fresh
+        mock_app.broker_connection.return_value = mock_conn_fresh
+
+        # Second call — ping fails, should close stale client
+        factory()
+        mock_client_stale.close.assert_called_once()
+
+    def test_first_call_creates_connection(self) -> None:
+        """First call should always create a new connection."""
+        mock_client = MagicMock()
+        mock_app = _make_mock_app(mock_client)
+
+        factory = _make_broker_redis_factory(mock_app)
+        client = factory()
+
+        assert client is mock_client
+        mock_app.broker_connection.assert_called_once()
--- a/cli/.gitignore
+++ b/cli/.gitignore
@@ -1,4 +1,3 @@
 onyx-cli
 cli
 onyx.cli
-__pycache__
--- a/cli/README.md
+++ b/cli/README.md
@@ -63,31 +63,6 @@ onyx-cli agents
 onyx-cli agents --json
 ```

-### Serve over SSH
-
-```shell
-# Start a public SSH endpoint for the CLI TUI
-onyx-cli serve --host 0.0.0.0 --port 2222
-
-# Connect as a client
-ssh your-host -p 2222
-```
-
-Clients can either:
- paste an API key at the login prompt, or
- skip the prompt by sending `ONYX_API_KEY` over SSH:
-
-```shell
-export ONYX_API_KEY=your-key
-ssh -o SendEnv=ONYX_API_KEY your-host -p 2222
-```
-
-Useful hardening flags:
- `--idle-timeout` (default `15m`)
- `--max-session-timeout` (default `8h`)
- `--rate-limit-per-minute` (default `20`)
- `--rate-limit-burst` (default `40`)
-
 ## Commands

 | Command | Description |
@@ -95,7 +70,6 @@ Useful hardening flags:
 | `chat` | Launch the interactive chat TUI (default) |
 | `ask` | Ask a one-shot question (non-interactive) |
 | `agents` | List available agents |
-| `serve` | Serve the interactive chat TUI over SSH |
 | `configure` | Configure server URL and API key |
 | `validate-config` | Validate configuration and test connection |

--- a/cli/cmd/root.go
+++ b/cli/cmd/root.go
@@ -1,17 +1,7 @@
 // Package cmd implements Cobra CLI commands for the Onyx CLI.
 package cmd

-import (
-	"context"
-	"fmt"
-	"time"
-
-	"github.com/onyx-dot-app/onyx/cli/internal/api"
-	"github.com/onyx-dot-app/onyx/cli/internal/config"
-	"github.com/onyx-dot-app/onyx/cli/internal/version"
-	log "github.com/sirupsen/logrus"
-	"github.com/spf13/cobra"
-)
+import "github.com/spf13/cobra"

 // Version and Commit are set via ldflags at build time.
 var (
@@ -26,69 +16,15 @@ func fullVersion() string {
 	return Version
 }

-func printVersion(cmd *cobra.Command) {
-	_, _ = fmt.Fprintf(cmd.OutOrStdout(), "Client version: %s\n", fullVersion())
-
-	cfg := config.Load()
-	if !cfg.IsConfigured() {
-		_, _ = fmt.Fprintf(cmd.OutOrStdout(), "Server version: unknown (not configured)\n")
-		return
-	}
-
-	client := api.NewClient(cfg)
-	ctx, cancel := context.WithTimeout(cmd.Context(), 5*time.Second)
-	defer cancel()
-
-	log.Debug("fetching backend version from /api/version")
-	backendVersion, err := client.GetBackendVersion(ctx)
-	if err != nil {
-		log.WithError(err).Debug("could not fetch backend version")
-		_, _ = fmt.Fprintf(cmd.OutOrStdout(), "Server version: unknown (could not reach server)\n")
-		return
-	}
-
-	if backendVersion == "" {
-		_, _ = fmt.Fprintf(cmd.OutOrStdout(), "Server version: unknown (empty response)\n")
-		return
-	}
-
-	_, _ = fmt.Fprintf(cmd.OutOrStdout(), "Server version: %s\n", backendVersion)
-
-	min := version.MinServer()
-	if sv, ok := version.Parse(backendVersion); ok && sv.LessThan(min) {
-		log.Warnf("Server version %s is below minimum required %d.%d, please upgrade",
-			backendVersion, min.Major, min.Minor)
-	}
-}
-
 // Execute creates and runs the root command.
 func Execute() error {
-	opts := struct {
-		Debug bool
-	}{}
-
 	rootCmd := &cobra.Command{
-		Use:   "onyx-cli",
-		Short: "Terminal UI for chatting with Onyx",
-		Long:  "Onyx CLI — a terminal interface for chatting with your Onyx agent.",
-		PersistentPreRun: func(cmd *cobra.Command, args []string) {
-			if opts.Debug {
-				log.SetLevel(log.DebugLevel)
-			} else {
-				log.SetLevel(log.InfoLevel)
-			}
-			log.SetFormatter(&log.TextFormatter{
-				DisableTimestamp: true,
-			})
-		},
+		Use:     "onyx-cli",
+		Short:   "Terminal UI for chatting with Onyx",
+		Long:    "Onyx CLI — a terminal interface for chatting with your Onyx agent.",
+		Version: fullVersion(),
 	}

-	rootCmd.PersistentFlags().BoolVar(&opts.Debug, "debug", false, "run in debug mode")
-
-	// Custom --version flag instead of Cobra's built-in (which only shows one version string)
-	var showVersion bool
-	rootCmd.Flags().BoolVarP(&showVersion, "version", "v", false, "Print client and server version information")
-
 	// Register subcommands
 	chatCmd := newChatCmd()
 	rootCmd.AddCommand(chatCmd)
@@ -96,16 +32,9 @@ func Execute() error {
 	rootCmd.AddCommand(newAgentsCmd())
 	rootCmd.AddCommand(newConfigureCmd())
 	rootCmd.AddCommand(newValidateConfigCmd())
-	rootCmd.AddCommand(newServeCmd())

-	// Default command is chat, but intercept --version first
-	rootCmd.RunE = func(cmd *cobra.Command, args []string) error {
-		if showVersion {
-			printVersion(cmd)
-			return nil
-		}
-		return chatCmd.RunE(cmd, args)
-	}
+	// Default command is chat
+	rootCmd.RunE = chatCmd.RunE

 	return rootCmd.Execute()
 }
--- a/cli/cmd/serve.go
+++ b/cli/cmd/serve.go
@@ -1,450 +0,0 @@
-package cmd
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"net"
-	"os"
-	"os/signal"
-	"path/filepath"
-	"strings"
-	"syscall"
-	"time"
-
-	"github.com/charmbracelet/bubbles/textinput"
-	tea "github.com/charmbracelet/bubbletea"
-	"github.com/charmbracelet/log"
-	"github.com/charmbracelet/ssh"
-	"github.com/charmbracelet/wish"
-	"github.com/charmbracelet/wish/activeterm"
-	"github.com/charmbracelet/wish/bubbletea"
-	"github.com/charmbracelet/wish/logging"
-	"github.com/charmbracelet/wish/ratelimiter"
-	"github.com/onyx-dot-app/onyx/cli/internal/api"
-	"github.com/onyx-dot-app/onyx/cli/internal/config"
-	"github.com/onyx-dot-app/onyx/cli/internal/tui"
-	"github.com/spf13/cobra"
-	"golang.org/x/time/rate"
-)
-
-const (
-	defaultServeIdleTimeout        = 15 * time.Minute
-	defaultServeMaxSessionTimeout  = 8 * time.Hour
-	defaultServeRateLimitPerMinute = 20
-	defaultServeRateLimitBurst     = 40
-	defaultServeRateLimitCacheSize = 4096
-	maxAPIKeyLength                = 512
-	apiKeyValidationTimeout        = 15 * time.Second
-	maxAPIKeyRetries               = 5
-)
-
-func sessionEnv(s ssh.Session, key string) string {
-	prefix := key + "="
-	for _, env := range s.Environ() {
-		if strings.HasPrefix(env, prefix) {
-			return env[len(prefix):]
-		}
-	}
-	return ""
-}
-
-func validateAPIKey(serverURL string, apiKey string) error {
-	trimmedKey := strings.TrimSpace(apiKey)
-	if len(trimmedKey) > maxAPIKeyLength {
-		return fmt.Errorf("API key is too long (max %d characters)", maxAPIKeyLength)
-	}
-
-	cfg := config.OnyxCliConfig{
-		ServerURL: serverURL,
-		APIKey:    trimmedKey,
-	}
-	client := api.NewClient(cfg)
-	ctx, cancel := context.WithTimeout(context.Background(), apiKeyValidationTimeout)
-	defer cancel()
-	return client.TestConnection(ctx)
-}
-
-// --- auth prompt (bubbletea model) ---
-
-type authState int
-
-const (
-	authInput authState = iota
-	authValidating
-	authDone
-)
-
-type authValidatedMsg struct {
-	key string
-	err error
-}
-
-type authModel struct {
-	input     textinput.Model
-	serverURL string
-	state     authState
-	apiKey    string // set on successful validation
-	errMsg    string
-	retries   int
-	aborted   bool
-}
-
-func newAuthModel(serverURL, initialErr string) authModel {
-	ti := textinput.New()
-	ti.Prompt = "  API Key: "
-	ti.EchoMode = textinput.EchoPassword
-	ti.EchoCharacter = '•'
-	ti.CharLimit = maxAPIKeyLength
-	ti.Width = 80
-	ti.Focus()
-
-	return authModel{
-		input:     ti,
-		serverURL: serverURL,
-		errMsg:    initialErr,
-	}
-}
-
-func (m authModel) Update(msg tea.Msg) (authModel, tea.Cmd) {
-	switch msg := msg.(type) {
-	case tea.WindowSizeMsg:
-		m.input.Width = max(msg.Width-14, 20) // account for prompt width
-		return m, nil
-	case tea.KeyMsg:
-		switch msg.Type {
-		case tea.KeyCtrlC, tea.KeyCtrlD:
-			m.aborted = true
-			return m, nil
-		default:
-			if m.state == authValidating {
-				return m, nil
-			}
-		}
-		switch msg.Type {
-		case tea.KeyEnter:
-			key := strings.TrimSpace(m.input.Value())
-			if key == "" {
-				m.errMsg = "No key entered."
-				m.retries++
-				if m.retries >= maxAPIKeyRetries {
-					m.errMsg = "Too many failed attempts. Disconnecting."
-					m.aborted = true
-					return m, nil
-				}
-				m.input.SetValue("")
-				return m, nil
-			}
-			m.state = authValidating
-			m.errMsg = ""
-			serverURL := m.serverURL
-			return m, func() tea.Msg {
-				return authValidatedMsg{key: key, err: validateAPIKey(serverURL, key)}
-			}
-		}
-
-	case authValidatedMsg:
-		if msg.err != nil {
-			m.state = authInput
-			m.errMsg = msg.err.Error()
-			m.retries++
-			if m.retries >= maxAPIKeyRetries {
-				m.errMsg = "Too many failed attempts. Disconnecting."
-				m.aborted = true
-				return m, nil
-			}
-			m.input.SetValue("")
-			return m, m.input.Focus()
-		}
-		m.apiKey = msg.key
-		m.state = authDone
-		return m, nil
-	}
-
-	if m.state == authInput {
-		var cmd tea.Cmd
-		m.input, cmd = m.input.Update(msg)
-		return m, cmd
-	}
-	return m, nil
-}
-
-func (m authModel) View() string {
-	settingsURL := strings.TrimRight(m.serverURL, "/") + "/app/settings/accounts-access"
-
-	var b strings.Builder
-	b.WriteString("\n")
-	b.WriteString("  \x1b[1;35mOnyx CLI\x1b[0m\n")
-	b.WriteString("  \x1b[90m" + m.serverURL + "\x1b[0m\n")
-	b.WriteString("\n")
-	b.WriteString("  Generate an API key at:\n")
-	b.WriteString("  \x1b[4;34m" + settingsURL + "\x1b[0m\n")
-	b.WriteString("\n")
-	b.WriteString("  \x1b[90mTip: skip this prompt by passing your key via SSH:\x1b[0m\n")
-	b.WriteString("  \x1b[90m  export ONYX_API_KEY=<key>\x1b[0m\n")
-	b.WriteString("  \x1b[90m  ssh -o SendEnv=ONYX_API_KEY <host> -p <port>\x1b[0m\n")
-	b.WriteString("\n")
-
-	if m.errMsg != "" {
-		b.WriteString("  \x1b[1;31m" + m.errMsg + "\x1b[0m\n\n")
-	}
-
-	switch m.state {
-	case authDone:
-		b.WriteString("  \x1b[32mAuthenticated.\x1b[0m\n")
-	case authValidating:
-		b.WriteString("  \x1b[90mValidating…\x1b[0m\n")
-	default:
-		b.WriteString(m.input.View() + "\n")
-	}
-
-	return b.String()
-}
-
-// --- serve model (wraps auth → TUI in a single bubbletea program) ---
-
-type serveModel struct {
-	auth      authModel
-	tui       tea.Model
-	authed    bool
-	serverCfg config.OnyxCliConfig
-	width     int
-	height    int
-}
-
-func newServeModel(serverCfg config.OnyxCliConfig, initialErr string) serveModel {
-	return serveModel{
-		auth:      newAuthModel(serverCfg.ServerURL, initialErr),
-		serverCfg: serverCfg,
-	}
-}
-
-func (m serveModel) Init() tea.Cmd {
-	return textinput.Blink
-}
-
-func (m serveModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
-	if !m.authed {
-		if ws, ok := msg.(tea.WindowSizeMsg); ok {
-			m.width = ws.Width
-			m.height = ws.Height
-		}
-
-		var cmd tea.Cmd
-		m.auth, cmd = m.auth.Update(msg)
-
-		if m.auth.aborted {
-			return m, tea.Quit
-		}
-		if m.auth.apiKey != "" {
-			cfg := config.OnyxCliConfig{
-				ServerURL:      m.serverCfg.ServerURL,
-				APIKey:         m.auth.apiKey,
-				DefaultAgentID: m.serverCfg.DefaultAgentID,
-			}
-			m.tui = tui.NewModel(cfg)
-			m.authed = true
-			w, h := m.width, m.height
-			return m, tea.Batch(
-				tea.EnterAltScreen,
-				tea.EnableMouseCellMotion,
-				m.tui.Init(),
-				func() tea.Msg { return tea.WindowSizeMsg{Width: w, Height: h} },
-			)
-		}
-		return m, cmd
-	}
-
-	var cmd tea.Cmd
-	m.tui, cmd = m.tui.Update(msg)
-	return m, cmd
-}
-
-func (m serveModel) View() string {
-	if !m.authed {
-		return m.auth.View()
-	}
-	return m.tui.View()
-}
-
-// --- serve command ---
-
-func newServeCmd() *cobra.Command {
-	var (
-		host              string
-		port              int
-		keyPath           string
-		idleTimeout       time.Duration
-		maxSessionTimeout time.Duration
-		rateLimitPerMin   int
-		rateLimitBurst    int
-		rateLimitCache    int
-	)
-
-	cmd := &cobra.Command{
-		Use:   "serve",
-		Short: "Serve the Onyx TUI over SSH",
-		Long: `Start an SSH server that presents the interactive Onyx chat TUI to
-connecting clients. Each SSH session gets its own independent TUI instance.
-
-Clients are prompted for their Onyx API key on connect. The key can also be
-provided via the ONYX_API_KEY environment variable to skip the prompt:
-
-  ssh -o SendEnv=ONYX_API_KEY host -p port
-
-The server URL is taken from the server operator's config. The server
-auto-generates an Ed25519 host key on first run if the key file does not
-already exist. The host key path can also be set via the ONYX_SSH_HOST_KEY
-environment variable (the --host-key flag takes precedence).
-
-Example:
-  onyx-cli serve --port 2222
-  ssh localhost -p 2222`,
-		RunE: func(cmd *cobra.Command, args []string) error {
-			serverCfg := config.Load()
-			if serverCfg.ServerURL == "" {
-				return fmt.Errorf("server URL is not configured; run 'onyx-cli configure' first")
-			}
-			if !cmd.Flags().Changed("host-key") {
-				if v := os.Getenv(config.EnvSSHHostKey); v != "" {
-					keyPath = v
-				}
-			}
-			if rateLimitPerMin <= 0 {
-				return fmt.Errorf("--rate-limit-per-minute must be > 0")
-			}
-			if rateLimitBurst <= 0 {
-				return fmt.Errorf("--rate-limit-burst must be > 0")
-			}
-			if rateLimitCache <= 0 {
-				return fmt.Errorf("--rate-limit-cache must be > 0")
-			}
-
-			addr := net.JoinHostPort(host, fmt.Sprintf("%d", port))
-			connectionLimiter := ratelimiter.NewRateLimiter(
-				rate.Limit(float64(rateLimitPerMin)/60.0),
-				rateLimitBurst,
-				rateLimitCache,
-			)
-
-			handler := func(s ssh.Session) (tea.Model, []tea.ProgramOption) {
-				apiKey := strings.TrimSpace(sessionEnv(s, config.EnvAPIKey))
-				var envErr string
-
-				if apiKey != "" {
-					if err := validateAPIKey(serverCfg.ServerURL, apiKey); err != nil {
-						envErr = fmt.Sprintf("ONYX_API_KEY from SSH environment is invalid: %s", err.Error())
-						apiKey = ""
-					}
-				}
-
-				if apiKey != "" {
-					// Env key is valid — go straight to the TUI.
-					cfg := config.OnyxCliConfig{
-						ServerURL:      serverCfg.ServerURL,
-						APIKey:         apiKey,
-						DefaultAgentID: serverCfg.DefaultAgentID,
-					}
-					return tui.NewModel(cfg), []tea.ProgramOption{
-						tea.WithAltScreen(),
-						tea.WithMouseCellMotion(),
-					}
-				}
-
-				// No valid env key — show auth prompt, then transition
-				// to the TUI within the same bubbletea program.
-				return newServeModel(serverCfg, envErr), []tea.ProgramOption{
-					tea.WithMouseCellMotion(),
-				}
-			}
-
-			serverOptions := []ssh.Option{
-				wish.WithAddress(addr),
-				wish.WithHostKeyPath(keyPath),
-				wish.WithMiddleware(
-					bubbletea.Middleware(handler),
-					activeterm.Middleware(),
-					ratelimiter.Middleware(connectionLimiter),
-					logging.Middleware(),
-				),
-			}
-			if idleTimeout > 0 {
-				serverOptions = append(serverOptions, wish.WithIdleTimeout(idleTimeout))
-			}
-			if maxSessionTimeout > 0 {
-				serverOptions = append(serverOptions, wish.WithMaxTimeout(maxSessionTimeout))
-			}
-
-			s, err := wish.NewServer(serverOptions...)
-			if err != nil {
-				return fmt.Errorf("could not create SSH server: %w", err)
-			}
-
-			done := make(chan os.Signal, 1)
-			signal.Notify(done, os.Interrupt, syscall.SIGTERM)
-
-			log.Info("Starting Onyx SSH server", "addr", addr)
-			log.Info("Connect with", "cmd", fmt.Sprintf("ssh %s -p %d", host, port))
-
-			errCh := make(chan error, 1)
-			go func() {
-				if err := s.ListenAndServe(); err != nil && !errors.Is(err, ssh.ErrServerClosed) {
-					log.Error("SSH server failed", "error", err)
-					errCh <- err
-				}
-			}()
-
-			var serverErr error
-			select {
-			case <-done:
-			case serverErr = <-errCh:
-			}
-
-			signal.Stop(done)
-			log.Info("Shutting down SSH server")
-			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-			defer cancel()
-			if shutdownErr := s.Shutdown(ctx); shutdownErr != nil {
-				return errors.Join(serverErr, shutdownErr)
-			}
-			return serverErr
-		},
-	}
-
-	cmd.Flags().StringVar(&host, "host", "localhost", "Host address to bind to")
-	cmd.Flags().IntVarP(&port, "port", "p", 2222, "Port to listen on")
-	cmd.Flags().StringVar(&keyPath, "host-key", filepath.Join(config.ConfigDir(), "host_ed25519"),
-		"Path to SSH host key (auto-generated if missing)")
-	cmd.Flags().DurationVar(
-		&idleTimeout,
-		"idle-timeout",
-		defaultServeIdleTimeout,
-		"Disconnect idle clients after this duration (set 0 to disable)",
-	)
-	cmd.Flags().DurationVar(
-		&maxSessionTimeout,
-		"max-session-timeout",
-		defaultServeMaxSessionTimeout,
-		"Maximum lifetime of a client session (set 0 to disable)",
-	)
-	cmd.Flags().IntVar(
-		&rateLimitPerMin,
-		"rate-limit-per-minute",
-		defaultServeRateLimitPerMinute,
-		"Per-IP connection rate limit (new sessions per minute)",
-	)
-	cmd.Flags().IntVar(
-		&rateLimitBurst,
-		"rate-limit-burst",
-		defaultServeRateLimitBurst,
-		"Per-IP burst limit for connection attempts",
-	)
-	cmd.Flags().IntVar(
-		&rateLimitCache,
-		"rate-limit-cache",
-		defaultServeRateLimitCacheSize,
-		"Maximum number of IP limiter entries tracked in memory",
-	)
-
-	return cmd
-}
--- a/cli/cmd/validate.go
+++ b/cli/cmd/validate.go
@@ -1,14 +1,10 @@
 package cmd

 import (
-	"context"
 	"fmt"
-	"time"

 	"github.com/onyx-dot-app/onyx/cli/internal/api"
 	"github.com/onyx-dot-app/onyx/cli/internal/config"
-	"github.com/onyx-dot-app/onyx/cli/internal/version"
-	log "github.com/sirupsen/logrus"
 	"github.com/spf13/cobra"
 )

@@ -39,25 +35,6 @@ func newValidateConfigCmd() *cobra.Command {
 			}

 			_, _ = fmt.Fprintln(cmd.OutOrStdout(), "Status:  connected and authenticated")
-
-			// Check backend version compatibility
-			vCtx, vCancel := context.WithTimeout(cmd.Context(), 5*time.Second)
-			defer vCancel()
-
-			backendVersion, err := client.GetBackendVersion(vCtx)
-			if err != nil {
-				log.WithError(err).Debug("could not fetch backend version")
-			} else if backendVersion == "" {
-				log.Debug("server returned empty version string")
-			} else {
-				_, _ = fmt.Fprintf(cmd.OutOrStdout(), "Version: %s\n", backendVersion)
-				min := version.MinServer()
-				if sv, ok := version.Parse(backendVersion); ok && sv.LessThan(min) {
-					log.Warnf("Server version %s is below minimum required %d.%d, please upgrade",
-						backendVersion, min.Major, min.Minor)
-				}
-			}
-
 			return nil
 		},
 	}
--- a/cli/go.mod
+++ b/cli/go.mod
@@ -1,63 +1,45 @@
 module github.com/onyx-dot-app/onyx/cli

-go 1.26.1
+go 1.26.0

 require (
-	github.com/charmbracelet/bubbles v1.0.0
-	github.com/charmbracelet/bubbletea v1.3.10
-	github.com/charmbracelet/glamour v1.0.0
-	github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834
-	github.com/charmbracelet/log v1.0.0
-	github.com/charmbracelet/ssh v0.0.0-20250826160808-ebfa259c7309
-	github.com/charmbracelet/wish v1.4.7
-	github.com/sirupsen/logrus v1.9.4
-	github.com/spf13/cobra v1.10.2
-	golang.org/x/term v0.41.0
-	golang.org/x/text v0.35.0
-	golang.org/x/time v0.15.0
+	github.com/charmbracelet/bubbles v0.20.0
+	github.com/charmbracelet/bubbletea v1.3.4
+	github.com/charmbracelet/glamour v0.8.0
+	github.com/charmbracelet/lipgloss v1.1.0
+	github.com/spf13/cobra v1.9.1
+	golang.org/x/term v0.30.0
+	golang.org/x/text v0.34.0
 )

 require (
-	github.com/alecthomas/chroma/v2 v2.23.1 // indirect
-	github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be // indirect
+	github.com/alecthomas/chroma/v2 v2.14.0 // indirect
 	github.com/atotto/clipboard v0.1.4 // indirect
 	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
 	github.com/aymerick/douceur v0.2.0 // indirect
-	github.com/charmbracelet/colorprofile v0.4.3 // indirect
-	github.com/charmbracelet/keygen v0.5.4 // indirect
-	github.com/charmbracelet/x/ansi v0.11.6 // indirect
-	github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
-	github.com/charmbracelet/x/conpty v0.2.0 // indirect
-	github.com/charmbracelet/x/exp/slice v0.0.0-20260323091123-df7b1bcffcca // indirect
-	github.com/charmbracelet/x/input v0.3.7 // indirect
-	github.com/charmbracelet/x/term v0.2.2 // indirect
-	github.com/charmbracelet/x/termios v0.1.1 // indirect
-	github.com/charmbracelet/x/windows v0.2.2 // indirect
-	github.com/clipperhouse/displaywidth v0.11.0 // indirect
-	github.com/clipperhouse/uax29/v2 v2.7.0 // indirect
-	github.com/creack/pty v1.1.24 // indirect
-	github.com/dlclark/regexp2 v1.11.5 // indirect
+	github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect
+	github.com/charmbracelet/x/ansi v0.8.0 // indirect
+	github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd // indirect
+	github.com/charmbracelet/x/term v0.2.1 // indirect
+	github.com/dlclark/regexp2 v1.11.0 // indirect
 	github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
-	github.com/go-logfmt/logfmt v0.6.1 // indirect
 	github.com/gorilla/css v1.0.1 // indirect
-	github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
-	github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
+	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mattn/go-localereader v0.0.1 // indirect
-	github.com/mattn/go-runewidth v0.0.21 // indirect
+	github.com/mattn/go-runewidth v0.0.16 // indirect
 	github.com/microcosm-cc/bluemonday v1.0.27 // indirect
 	github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
 	github.com/muesli/cancelreader v0.2.2 // indirect
 	github.com/muesli/reflow v0.3.0 // indirect
 	github.com/muesli/termenv v0.16.0 // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
-	github.com/spf13/pflag v1.0.10 // indirect
+	github.com/spf13/pflag v1.0.6 // indirect
 	github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
-	github.com/yuin/goldmark v1.8.2 // indirect
-	github.com/yuin/goldmark-emoji v1.0.6 // indirect
-	golang.org/x/crypto v0.49.0 // indirect
-	golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 // indirect
-	golang.org/x/net v0.52.0 // indirect
-	golang.org/x/sys v0.42.0 // indirect
+	github.com/yuin/goldmark v1.7.4 // indirect
+	github.com/yuin/goldmark-emoji v1.0.3 // indirect
+	golang.org/x/net v0.38.0 // indirect
+	golang.org/x/sync v0.19.0 // indirect
+	golang.org/x/sys v0.31.0 // indirect
 )
--- a/cli/go.sum
+++ b/cli/go.sum
@@ -1,89 +1,55 @@
-github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0=
-github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
-github.com/alecthomas/chroma/v2 v2.23.1 h1:nv2AVZdTyClGbVQkIzlDm/rnhk1E9bU9nXwmZ/Vk/iY=
-github.com/alecthomas/chroma/v2 v2.23.1/go.mod h1:NqVhfBR0lte5Ouh3DcthuUCTUpDC9cxBOfyMbMQPs3o=
-github.com/alecthomas/repr v0.5.2 h1:SU73FTI9D1P5UNtvseffFSGmdNci/O6RsqzeXJtP0Qs=
-github.com/alecthomas/repr v0.5.2/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
-github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
-github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
+github.com/alecthomas/assert/v2 v2.7.0 h1:QtqSACNS3tF7oasA8CU6A6sXZSBDqnm7RfpLl9bZqbE=
+github.com/alecthomas/assert/v2 v2.7.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
+github.com/alecthomas/chroma/v2 v2.14.0 h1:R3+wzpnUArGcQz7fCETQBzO5n9IMNi13iIs46aU4V9E=
+github.com/alecthomas/chroma/v2 v2.14.0/go.mod h1:QolEbTfmUHIMVpBqxeDnNBj2uoeI4EbYP4i6n68SG4I=
+github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
+github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
 github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
 github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
 github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
 github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
-github.com/aymanbagabas/go-udiff v0.3.1 h1:LV+qyBQ2pqe0u42ZsUEtPiCaUoqgA9gYRDs3vj1nolY=
-github.com/aymanbagabas/go-udiff v0.3.1/go.mod h1:G0fsKmG+P6ylD0r6N/KgQD/nWzgfnl8ZBcNLgcbrw8E=
+github.com/aymanbagabas/go-udiff v0.2.0 h1:TK0fH4MteXUDspT88n8CKzvK0X9O2xu9yQjWpi6yML8=
+github.com/aymanbagabas/go-udiff v0.2.0/go.mod h1:RE4Ex0qsGkTAJoQdQQCA0uG+nAzJO/pI/QwceO5fgrA=
 github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
 github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
-github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5fXonfvc=
-github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E=
-github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
-github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4=
-github.com/charmbracelet/colorprofile v0.4.3 h1:QPa1IWkYI+AOB+fE+mg/5/4HRMZcaXex9t5KX76i20Q=
-github.com/charmbracelet/colorprofile v0.4.3/go.mod h1:/zT4BhpD5aGFpqQQqw7a+VtHCzu+zrQtt1zhMt9mR4Q=
-github.com/charmbracelet/glamour v1.0.0 h1:AWMLOVFHTsysl4WV8T8QgkQ0s/ZNZo7CiE4WKhk8l08=
-github.com/charmbracelet/glamour v1.0.0/go.mod h1:DSdohgOBkMr2ZQNhw4LZxSGpx3SvpeujNoXrQyH2hxo=
-github.com/charmbracelet/keygen v0.5.4 h1:XQYgf6UEaTGgQSSmiPpIQ78WfseNQp4Pz8N/c1OsrdA=
-github.com/charmbracelet/keygen v0.5.4/go.mod h1:t4oBRr41bvK7FaJsAaAQhhkUuHslzFXVjOBwA55CZNM=
-github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834 h1:ZR7e0ro+SZZiIZD7msJyA+NjkCNNavuiPBLgerbOziE=
-github.com/charmbracelet/lipgloss v1.1.1-0.20250404203927-76690c660834/go.mod h1:aKC/t2arECF6rNOnaKaVU6y4t4ZeHQzqfxedE/VkVhA=
-github.com/charmbracelet/log v1.0.0 h1:HVVVMmfOorfj3BA9i8X8UL69Hoz9lI0PYwXfJvOdRc4=
-github.com/charmbracelet/log v1.0.0/go.mod h1:uYgY3SmLpwJWxmlrPwXvzVYujxis1vAKRV/0VQB7yWA=
-github.com/charmbracelet/ssh v0.0.0-20250826160808-ebfa259c7309 h1:dCVbCRRtg9+tsfiTXTp0WupDlHruAXyp+YoxGVofHHc=
-github.com/charmbracelet/ssh v0.0.0-20250826160808-ebfa259c7309/go.mod h1:R9cISUs5kAH4Cq/rguNbSwcR+slE5Dfm8FEs//uoIGE=
-github.com/charmbracelet/wish v1.4.7 h1:O+jdLac3s6GaqkOHHSwezejNK04vl6VjO1A+hl8J8Yc=
-github.com/charmbracelet/wish v1.4.7/go.mod h1:OBZ8vC62JC5cvbxJLh+bIWtG7Ctmct+ewziuUWK+G14=
-github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF7ihZW8=
-github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ=
-github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
-github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
-github.com/charmbracelet/x/conpty v0.2.0 h1:eKtA2hm34qNfgJCDp/M6Dc0gLy7e07YEK4qAdNGOvVY=
-github.com/charmbracelet/x/conpty v0.2.0/go.mod h1:fexgUnVrZgw8scD49f6VSi0Ggj9GWYIrpedRthAwW/8=
-github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91 h1:payRxjMjKgx2PaCWLZ4p3ro9y97+TVLZNaRZgJwSVDQ=
-github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U=
-github.com/charmbracelet/x/exp/slice v0.0.0-20260323091123-df7b1bcffcca h1:QQoyQLgUzojMNWHVHToN6d9qTvT0KWtxUKIRPx/Ox5o=
-github.com/charmbracelet/x/exp/slice v0.0.0-20260323091123-df7b1bcffcca/go.mod h1:vqEfX6xzqW1pKKZUUiFOKg0OQ7bCh54Q2vR/tserrRA=
-github.com/charmbracelet/x/input v0.3.7 h1:UzVbkt1vgM9dBQ+K+uRolBlN6IF2oLchmPKKo/aucXo=
-github.com/charmbracelet/x/input v0.3.7/go.mod h1:ZSS9Cia6Cycf2T6ToKIOxeTBTDwl25AGwArJuGaOBH8=
-github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
-github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
-github.com/charmbracelet/x/termios v0.1.1 h1:o3Q2bT8eqzGnGPOYheoYS8eEleT5ZVNYNy8JawjaNZY=
-github.com/charmbracelet/x/termios v0.1.1/go.mod h1:rB7fnv1TgOPOyyKRJ9o+AsTU/vK5WHJ2ivHeut/Pcwo=
-github.com/charmbracelet/x/windows v0.2.2 h1:IofanmuvaxnKHuV04sC0eBy/smG6kIKrWG2/jYn2GuM=
-github.com/charmbracelet/x/windows v0.2.2/go.mod h1:/8XtdKZzedat74NQFn0NGlGL4soHB0YQZrETF96h75k=
-github.com/clipperhouse/displaywidth v0.11.0 h1:lBc6kY44VFw+TDx4I8opi/EtL9m20WSEFgwIwO+UVM8=
-github.com/clipperhouse/displaywidth v0.11.0/go.mod h1:bkrFNkf81G8HyVqmKGxsPufD3JhNl3dSqnGhOoSD/o0=
-github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk=
-github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM=
+github.com/charmbracelet/bubbles v0.20.0 h1:jSZu6qD8cRQ6k9OMfR1WlM+ruM8fkPWkHvQWD9LIutE=
+github.com/charmbracelet/bubbles v0.20.0/go.mod h1:39slydyswPy+uVOHZ5x/GjwVAFkCsV8IIVy+4MhzwwU=
+github.com/charmbracelet/bubbletea v1.3.4 h1:kCg7B+jSCFPLYRA52SDZjr51kG/fMUEoPoZrkaDHyoI=
+github.com/charmbracelet/bubbletea v1.3.4/go.mod h1:dtcUCyCGEX3g9tosuYiut3MXgY/Jsv9nKVdibKKRRXo=
+github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc h1:4pZI35227imm7yK2bGPcfpFEmuY1gc2YSTShr4iJBfs=
+github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc/go.mod h1:X4/0JoqgTIPSFcRA/P6INZzIuyqdFY5rm8tb41s9okk=
+github.com/charmbracelet/glamour v0.8.0 h1:tPrjL3aRcQbn++7t18wOpgLyl8wrOHUEDS7IZ68QtZs=
+github.com/charmbracelet/glamour v0.8.0/go.mod h1:ViRgmKkf3u5S7uakt2czJ272WSg2ZenlYEZXT2x7Bjw=
+github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
+github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
+github.com/charmbracelet/x/ansi v0.8.0 h1:9GTq3xq9caJW8ZrBTe0LIe2fvfLR/bYXKTx2llXn7xE=
+github.com/charmbracelet/x/ansi v0.8.0/go.mod h1:wdYl/ONOLHLIVmQaxbIYEC/cRKOQyjTkowiI4blgS9Q=
+github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd h1:vy0GVL4jeHEwG5YOXDmi86oYw2yuYUGqz6a8sLwg0X8=
+github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd/go.mod h1:xe0nKWGd3eJgtqZRaN9RjMtK7xUYchjzPr7q6kcvCCs=
+github.com/charmbracelet/x/exp/golden v0.0.0-20240815200342-61de596daa2b h1:MnAMdlwSltxJyULnrYbkZpp4k58Co7Tah3ciKhSNo0Q=
+github.com/charmbracelet/x/exp/golden v0.0.0-20240815200342-61de596daa2b/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U=
+github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ=
+github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg=
 github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
-github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
-github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
-github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
-github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
-github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI=
+github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
 github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
-github.com/go-logfmt/logfmt v0.6.1 h1:4hvbpePJKnIzH1B+8OR/JPbTx37NktoI9LE2QZBBkvE=
-github.com/go-logfmt/logfmt v0.6.1/go.mod h1:EV2pOAQoZaT1ZXZbqDl5hrymndi4SY9ED9/z6CO0XAk=
-github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
-github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
 github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
-github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
-github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
 github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
 github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
-github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
-github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
+github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
+github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
 github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
 github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
-github.com/mattn/go-runewidth v0.0.21 h1:jJKAZiQH+2mIinzCJIaIG9Be1+0NR+5sz/lYEEjdM8w=
-github.com/mattn/go-runewidth v0.0.21/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
+github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
+github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
 github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
 github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
@@ -94,47 +60,35 @@ github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
 github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
 github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
 github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
-github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
-github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
 github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w=
-github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g=
-github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
-github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
-github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
-github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
-github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
-github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo=
+github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0=
+github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
+github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
 github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
-github.com/yuin/goldmark v1.8.2 h1:kEGpgqJXdgbkhcOgBxkC0X0PmoPG1ZyoZ117rDVp4zE=
-github.com/yuin/goldmark v1.8.2/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg=
-github.com/yuin/goldmark-emoji v1.0.6 h1:QWfF2FYaXwL74tfGOW5izeiZepUDroDJfWubQI9HTHs=
-github.com/yuin/goldmark-emoji v1.0.6/go.mod h1:ukxJDKFpdFb5x0a5HqbdlcKtebh086iJpI31LTKmWuA=
-go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
-golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
-golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
-golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 h1:jiDhWWeC7jfWqR9c/uplMOqJ0sbNlNWv0UkzE0vX1MA=
-golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90/go.mod h1:xE1HEv6b+1SCZ5/uscMRjUBKtIxworgEcEi+/n9NQDQ=
-golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
-golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
-golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
-golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
+github.com/yuin/goldmark v1.7.1/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E=
+github.com/yuin/goldmark v1.7.4 h1:BDXOHExt+A7gwPCJgPIIq7ENvceR7we7rOS9TNoLZeg=
+github.com/yuin/goldmark v1.7.4/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E=
+github.com/yuin/goldmark-emoji v1.0.3 h1:aLRkLHOuBR2czCY4R8olwMjID+tENfhyFDMCRhbIQY4=
+github.com/yuin/goldmark-emoji v1.0.3/go.mod h1:tTkZEbwu5wkPmgTcitqddVxY9osFZiavD+r4AzQrh1U=
+golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E=
+golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
+golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
+golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
+golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
+golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
-golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
-golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU=
-golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A=
-golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
-golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
-golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
-golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
+golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
+golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
+golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
+golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk=
+golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/cli/hatch_build.py
+++ b/cli/hatch_build.py
@@ -34,7 +34,8 @@ class CustomBuildHook(BuildHookInterface):
        # Build the Go binary (always rebuild to ensure correct version injection)
        if not os.path.exists(binary_name):
            print(f"Building Go binary '{binary_name}'...")
-            ldflags = f"-X main.version={tag} -X main.commit={commit} -s -w"
+            pkg = "github.com/onyx-dot-app/onyx/cli/cmd"
+            ldflags = f"-X {pkg}.version={tag}" f" -X {pkg}.commit={commit}" " -s -w"
            subprocess.check_call(  # noqa: S603
                ["go", "build", f"-ldflags={ldflags}", "-o", binary_name],
            )
--- a/cli/internal/api/client.go
+++ b/cli/internal/api/client.go
@@ -270,17 +270,6 @@ func (c *Client) UploadFile(ctx context.Context, filePath string) (*models.FileD
 	}, nil
 }

-// GetBackendVersion fetches the backend version string from /api/version.
-func (c *Client) GetBackendVersion(ctx context.Context) (string, error) {
-	var resp struct {
-		BackendVersion string `json:"backend_version"`
-	}
-	if err := c.doJSON(ctx, "GET", "/api/version", nil, &resp); err != nil {
-		return "", err
-	}
-	return resp.BackendVersion, nil
-}
-
 // StopChatSession sends a stop signal for a streaming session (best-effort).
 func (c *Client) StopChatSession(ctx context.Context, sessionID string) {
 	req, err := c.newRequest(ctx, "POST", "/api/chat/stop-chat-session/"+sessionID, nil)
--- a/cli/internal/config/config.go
+++ b/cli/internal/config/config.go
@@ -9,10 +9,9 @@ import (
 )

 const (
-	EnvServerURL  = "ONYX_SERVER_URL"
-	EnvAPIKey     = "ONYX_API_KEY"
+	EnvServerURL    = "ONYX_SERVER_URL"
+	EnvAPIKey = "ONYX_API_KEY"
 	EnvAgentID    = "ONYX_PERSONA_ID"
-	EnvSSHHostKey = "ONYX_SSH_HOST_KEY"
 )

 // OnyxCliConfig holds the CLI configuration.
@@ -36,8 +35,8 @@ func (c OnyxCliConfig) IsConfigured() bool {
 	return c.APIKey != ""
 }

-// ConfigDir returns ~/.config/onyx-cli
-func ConfigDir() string {
+// configDir returns ~/.config/onyx-cli
+func configDir() string {
 	if xdg := os.Getenv("XDG_CONFIG_HOME"); xdg != "" {
 		return filepath.Join(xdg, "onyx-cli")
 	}
@@ -50,7 +49,7 @@ func ConfigDir() string {

 // ConfigFilePath returns the full path to the config file.
 func ConfigFilePath() string {
-	return filepath.Join(ConfigDir(), "config.json")
+	return filepath.Join(configDir(), "config.json")
 }

 // ConfigExists checks if the config file exists on disk.
@@ -88,7 +87,7 @@ func Load() OnyxCliConfig {

 // Save writes the config to disk, creating parent directories if needed.
 func Save(cfg OnyxCliConfig) error {
-	dir := ConfigDir()
+	dir := configDir()
 	if err := os.MkdirAll(dir, 0o755); err != nil {
 		return err
 	}
--- a/cli/internal/version/version.go
+++ b/cli/internal/version/version.go
@@ -1,58 +0,0 @@
-// Package version provides semver parsing and compatibility checks.
-package version
-
-import (
-	"strconv"
-	"strings"
-)
-
-// Semver holds parsed semantic version components.
-type Semver struct {
-	Major int
-	Minor int
-	Patch int
-}
-
-// minServer is the minimum backend version required by this CLI.
-var minServer = Semver{Major: 3, Minor: 0, Patch: 0}
-
-// MinServer returns the minimum backend version required by this CLI.
-func MinServer() Semver { return minServer }
-
-// Parse extracts major, minor, patch from a version string like "3.1.2" or "v3.1.2".
-// Returns ok=false if the string is not valid semver.
-func Parse(v string) (Semver, bool) {
-	v = strings.TrimPrefix(v, "v")
-	// Strip any pre-release suffix (e.g. "-beta.1") and build metadata (e.g. "+build.1")
-	if idx := strings.IndexAny(v, "-+"); idx != -1 {
-		v = v[:idx]
-	}
-	parts := strings.SplitN(v, ".", 3)
-	if len(parts) != 3 {
-		return Semver{}, false
-	}
-	major, err := strconv.Atoi(parts[0])
-	if err != nil {
-		return Semver{}, false
-	}
-	minor, err := strconv.Atoi(parts[1])
-	if err != nil {
-		return Semver{}, false
-	}
-	patch, err := strconv.Atoi(parts[2])
-	if err != nil {
-		return Semver{}, false
-	}
-	return Semver{Major: major, Minor: minor, Patch: patch}, true
-}
-
-// LessThan reports whether s is strictly less than other.
-func (s Semver) LessThan(other Semver) bool {
-	if s.Major != other.Major {
-		return s.Major < other.Major
-	}
-	if s.Minor != other.Minor {
-		return s.Minor < other.Minor
-	}
-	return s.Patch < other.Patch
-}
--- a/cli/pyproject.toml
+++ b/cli/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["hatchling==1.29.0", "go-bin~=1.26.1", "manygo==0.2.0"]
+requires = ["hatchling", "go-bin~=1.24.11", "manygo"]
 build-backend = "hatchling.build"

 [project]
--- a/deployment/helm/charts/onyx/Chart.yaml
+++ b/deployment/helm/charts/onyx/Chart.yaml
@@ -5,7 +5,7 @@ home: https://www.onyx.app/
 sources:
  - "https://github.com/onyx-dot-app/onyx"
 type: application
-version: 0.4.39
+version: 0.4.37
 appVersion: latest
 annotations:
  category: Productivity
--- a/deployment/helm/charts/onyx/dashboards/indexing-pipeline.json
+++ b/deployment/helm/charts/onyx/dashboards/indexing-pipeline.json
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Dane Urban	46565faec3	.	2026-03-31 22:52:59 +11:00
Dane Urban	1a2ebafeca	.	2026-03-31 22:52:09 +11:00
Dane Urban	96d77cc59f	Add db migration	2026-03-31 19:25:23 +11:00
Dane Urban	88cdec5409	Minor things	2026-03-31 18:54:48 +11:00
Dane Urban	fd0b76edca	.	2026-03-31 17:07:06 +11:00
Dane Urban	cc99c968a7	.	2026-03-31 16:59:50 +11:00
Dane Urban	53c03e6b60	.'	2026-03-31 11:24:21 +11:00
Dane Urban	1d2d79127d	.	2026-03-30 14:45:32 +11:00
Dane Urban	f44663c23c	.	2026-03-30 11:49:27 +11:00
Dane Urban	b73d26aedd	.	2026-03-27 23:54:51 +11:00