refactor(files): Port csv type to tabular (#9785 )

refactor(opal): split Card sizeVariant into padding + rounding (#9823 )
feat(file-upload): Upload files exceeding tokens but skip indexing (#9751 )
2026-04-01 04:52:43 +00:00 · 2026-04-01 03:37:13 +00:00 · 2026-04-01 03:32:08 +00:00 · 2026-04-01 02:14:51 +00:00 · 2026-04-01 01:28:37 +00:00 · 2026-04-01 01:09:05 +00:00
312 changed files with 14041 additions and 4931 deletions
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
@@ -704,6 +704,9 @@ jobs:
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
            NODE_OPTIONS=--max-old-space-size=8192
+            SENTRY_RELEASE=${{ github.sha }}
+          secrets: |
+            sentry_auth_token=${{ secrets.SENTRY_AUTH_TOKEN }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-amd64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
@@ -786,6 +789,9 @@ jobs:
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
            NODE_OPTIONS=--max-old-space-size=8192
+            SENTRY_RELEASE=${{ github.sha }}
+          secrets: |
+            sentry_auth_token=${{ secrets.SENTRY_AUTH_TOKEN }}
          cache-from: |
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:cloudweb-cache-arm64
            type=registry,ref=${{ env.REGISTRY_IMAGE }}:latest
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -47,7 +47,8 @@ jobs:
          done

      - name: Publish Helm charts to gh-pages
-        uses: stefanprodan/helm-gh-pages@0ad2bb377311d61ac04ad9eb6f252fb68e207260 # ratchet:stefanprodan/helm-gh-pages@v1.7.0
+        # NOTE: HEAD of https://github.com/stefanprodan/helm-gh-pages/pull/43
+        uses: stefanprodan/helm-gh-pages@ad32ad3b8720abfeaac83532fd1e9bdfca5bbe27 # zizmor: ignore[impostor-commit]
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
          charts_dir: deployment/helm/charts
--- a/.github/workflows/nightly-llm-provider-chat.yml
+++ b/.github/workflows/nightly-llm-provider-chat.yml
@@ -35,6 +35,7 @@ jobs:
    needs: [provider-chat-test]
    if: failure() && github.event_name == 'schedule'
    runs-on: ubuntu-slim
+    environment: ci-protected
    timeout-minutes: 5
    steps:
      - name: Checkout
--- a/.github/workflows/post-merge-beta-cherry-pick.yml
+++ b/.github/workflows/post-merge-beta-cherry-pick.yml
@@ -183,6 +183,7 @@ jobs:
      - cherry-pick-to-latest-release
    if: needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && needs.resolve-cherry-pick-request.result == 'success' && needs.cherry-pick-to-latest-release.result == 'success'
    runs-on: ubuntu-slim
+    environment: ci-protected
    timeout-minutes: 10
    steps:
      - name: Checkout
@@ -232,6 +233,7 @@ jobs:
      - cherry-pick-to-latest-release
    if: always() && needs.resolve-cherry-pick-request.outputs.should_cherrypick == 'true' && (needs.resolve-cherry-pick-request.result == 'failure' || needs.cherry-pick-to-latest-release.result == 'failure')
    runs-on: ubuntu-slim
+    environment: ci-protected
    timeout-minutes: 10
    steps:
      - name: Checkout
--- a/.github/workflows/pr-desktop-build.yml
+++ b/.github/workflows/pr-desktop-build.yml
@@ -63,7 +63,7 @@ jobs:
          targets: ${{ matrix.target }}

      - name: Cache Cargo registry and build
-        uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # zizmor: ignore[cache-poisoning]
+        uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # zizmor: ignore[cache-poisoning]
        with:
          path: |
            ~/.cargo/bin/
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -41,7 +41,7 @@ jobs:
          version: v3.19.0

      - name: Set up chart-testing
-        uses: helm/chart-testing-action@b5eebdd9998021f29756c53432f48dab66394810
+        uses: helm/chart-testing-action@2e2940618cb426dce2999631d543b53cdcfc8527
        with:
          uv_version: "0.9.9"

--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -284,7 +284,7 @@ jobs:

      - name: Cache playwright cache
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
-        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
        with:
          path: ~/.cache/ms-playwright
          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
@@ -626,7 +626,7 @@ jobs:

      - name: Cache playwright cache
        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
-        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
        with:
          path: ~/.cache/ms-playwright
          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -56,7 +56,7 @@ jobs:

      - name: Cache mypy cache
        if: ${{ vars.DISABLE_MYPY_CACHE != 'true' }}
-        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
+        uses: runs-on/cache@a5f51d6f3fece787d03b7b4e981c82538a0654ed # ratchet:runs-on/cache@v4
        with:
          path: .mypy_cache
          key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'pyproject.toml') }}
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -31,6 +31,7 @@ jobs:
      - runner=4cpu-linux-arm64
      - "run-id=${{ github.run_id }}-model-check"
      - "extras=ecr-cache"
+    environment: ci-protected
    timeout-minutes: 45

    env:
--- a/.github/workflows/preview.yml
+++ b/.github/workflows/preview.yml
@@ -15,6 +15,7 @@ permissions:
 jobs:
  Deploy-Preview:
    runs-on: ubuntu-latest
+    environment: ci-protected
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
--- a/.github/workflows/release-cli.yml
+++ b/.github/workflows/release-cli.yml
@@ -13,15 +13,6 @@ jobs:
    permissions:
      id-token: write
    timeout-minutes: 10
-    strategy:
-      matrix:
-        os-arch:
-          - { goos: "linux", goarch: "amd64" }
-          - { goos: "linux", goarch: "arm64" }
-          - { goos: "windows", goarch: "amd64" }
-          - { goos: "windows", goarch: "arm64" }
-          - { goos: "darwin", goarch: "amd64" }
-          - { goos: "darwin", goarch: "arm64" }
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
        with:
@@ -31,9 +22,11 @@ jobs:
          enable-cache: false
          version: "0.9.9"
      - run: |
-          GOOS="${{ matrix.os-arch.goos }}" \
-          GOARCH="${{ matrix.os-arch.goarch }}" \
-          uv build --wheel
+          for goos in linux windows darwin; do
+            for goarch in amd64 arm64; do
+              GOOS="$goos" GOARCH="$goarch" uv build --wheel
+            done
+          done
        working-directory: cli
      - run: uv publish
        working-directory: cli
--- a/.github/workflows/storybook-deploy.yml
+++ b/.github/workflows/storybook-deploy.yml
@@ -25,6 +25,7 @@ permissions:
 jobs:
  Deploy-Storybook:
    runs-on: ubuntu-latest
+    environment: ci-protected
    timeout-minutes: 30
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v4
@@ -54,6 +55,7 @@ jobs:
    needs: Deploy-Storybook
    if: always() && needs.Deploy-Storybook.result == 'failure'
    runs-on: ubuntu-latest
+    environment: ci-protected
    timeout-minutes: 10
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v4
--- a/.github/workflows/sync_foss.yml
+++ b/.github/workflows/sync_foss.yml
@@ -9,6 +9,7 @@ on:
 jobs:
  sync-foss:
    runs-on: ubuntu-latest
+    environment: ci-protected
    timeout-minutes: 45
    permissions:
      contents: read
--- a/.github/workflows/tag-nightly.yml
+++ b/.github/workflows/tag-nightly.yml
@@ -11,6 +11,7 @@ permissions:
 jobs:
  create-and-push-tag:
    runs-on: ubuntu-slim
+    environment: ci-protected
    timeout-minutes: 45

    steps:
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -122,7 +122,7 @@ repos:
    rev: 5d1e709b7be35cb2025444e19de266b056b7b7ee # frozen: v2.10.1
    hooks:
      - id: golangci-lint
-        language_version: "1.26.0"
+        language_version: "1.26.1"
        entry: bash -c "find . -name go.mod -not -path './.venv/*' -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"

  - repo: https://github.com/astral-sh/ruff-pre-commit
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep
 > [!TIP]
 > Run Onyx with one command (or see deployment section below):
 > ```
-> curl -fsSL https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.sh > install.sh && chmod +x install.sh && ./install.sh
+> curl -fsSL https://onyx.app/install_onyx.sh | bash
 > ```

 ****
--- a/backend/alembic/versions/8188861f4e92_csv_to_tabular_chat_file_type.py
+++ b/backend/alembic/versions/8188861f4e92_csv_to_tabular_chat_file_type.py
@@ -0,0 +1,54 @@
+"""csv to tabular chat file type
+
+Revision ID: 8188861f4e92
+Revises: d8cdfee5df80
+Create Date: 2026-03-31 19:23:05.753184
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "8188861f4e92"
+down_revision = "d8cdfee5df80"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.execute(
+        """
+        UPDATE chat_message
+        SET files = (
+            SELECT jsonb_agg(
+                CASE
+                    WHEN elem->>'type' = 'csv'
+                    THEN jsonb_set(elem, '{type}', '"tabular"')
+                    ELSE elem
+                END
+            )
+            FROM jsonb_array_elements(files) AS elem
+        )
+        WHERE files::text LIKE '%"type": "csv"%'
+        """
+    )
+
+
+def downgrade() -> None:
+    op.execute(
+        """
+        UPDATE chat_message
+        SET files = (
+            SELECT jsonb_agg(
+                CASE
+                    WHEN elem->>'type' = 'tabular'
+                    THEN jsonb_set(elem, '{type}', '"csv"')
+                    ELSE elem
+                END
+            )
+            FROM jsonb_array_elements(files) AS elem
+        )
+        WHERE files::text LIKE '%"type": "tabular"%'
+        """
+    )
--- a/backend/alembic/versions/d8cdfee5df80_add_skipped_to_userfilestatus.py
+++ b/backend/alembic/versions/d8cdfee5df80_add_skipped_to_userfilestatus.py
@@ -0,0 +1,55 @@
+"""add skipped to userfilestatus
+
+Revision ID: d8cdfee5df80
+Revises: 1d78c0ca7853
+Create Date: 2026-04-01 10:47:12.593950
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "d8cdfee5df80"
+down_revision = "1d78c0ca7853"
+branch_labels = None
+depends_on = None
+
+
+TABLE = "user_file"
+COLUMN = "status"
+CONSTRAINT_NAME = "ck_user_file_status"
+
+OLD_VALUES = ("PROCESSING", "INDEXING", "COMPLETED", "FAILED", "CANCELED", "DELETING")
+NEW_VALUES = (
+    "PROCESSING",
+    "INDEXING",
+    "COMPLETED",
+    "SKIPPED",
+    "FAILED",
+    "CANCELED",
+    "DELETING",
+)
+
+
+def _drop_status_check_constraint() -> None:
+    inspector = sa.inspect(op.get_bind())
+    for constraint in inspector.get_check_constraints(TABLE):
+        if COLUMN in constraint.get("sqltext", ""):
+            constraint_name = constraint["name"]
+            if constraint_name is not None:
+                op.drop_constraint(constraint_name, TABLE, type_="check")
+
+
+def upgrade() -> None:
+    _drop_status_check_constraint()
+    in_clause = ", ".join(f"'{v}'" for v in NEW_VALUES)
+    op.create_check_constraint(CONSTRAINT_NAME, TABLE, f"{COLUMN} IN ({in_clause})")
+
+
+def downgrade() -> None:
+    op.execute(f"UPDATE {TABLE} SET {COLUMN} = 'COMPLETED' WHERE {COLUMN} = 'SKIPPED'")
+    _drop_status_check_constraint()
+    in_clause = ", ".join(f"'{v}'" for v in OLD_VALUES)
+    op.create_check_constraint(CONSTRAINT_NAME, TABLE, f"{COLUMN} IN ({in_clause})")
--- a/backend/ee/onyx/background/celery/apps/primary.py
+++ b/backend/ee/onyx/background/celery/apps/primary.py
@@ -5,6 +5,7 @@ from onyx.background.celery.apps.primary import celery_app
 celery_app.autodiscover_tasks(
    app_base.filter_task_modules(
        [
+            "ee.onyx.background.celery.tasks.hooks",
            "ee.onyx.background.celery.tasks.doc_permission_syncing",
            "ee.onyx.background.celery.tasks.external_group_syncing",
            "ee.onyx.background.celery.tasks.cloud",
--- a/backend/ee/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/ee/onyx/background/celery/tasks/beat_schedule.py
@@ -55,6 +55,15 @@ ee_tasks_to_schedule: list[dict] = []

 if not MULTI_TENANT:
    ee_tasks_to_schedule = [
+        {
+            "name": "hook-execution-log-cleanup",
+            "task": OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,
+            "schedule": timedelta(days=1),
+            "options": {
+                "priority": OnyxCeleryPriority.LOW,
+                "expires": BEAT_EXPIRES_DEFAULT,
+            },
+        },
        {
            "name": "autogenerate-usage-report",
            "task": OnyxCeleryTask.GENERATE_USAGE_REPORT_TASK,
--- a/backend/ee/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
@@ -28,6 +28,7 @@ from onyx.access.models import DocExternalAccess
 from onyx.access.models import ElementExternalAccess
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_find_task
+from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_queued_task_ids
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
@@ -187,7 +188,6 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None
    # (which lives on a different db number)
    r = get_redis_client()
    r_replica = get_redis_replica_client()
-    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_DOC_PERMISSIONS_SYNC_BEAT_LOCK,
@@ -227,6 +227,7 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None
            # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),
            # or be currently executing
            try:
+                r_celery = celery_get_broker_client(self.app)
                validate_permission_sync_fences(
                    tenant_id, r, r_replica, r_celery, lock_beat
                )
--- a/backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py
@@ -29,6 +29,7 @@ from ee.onyx.external_permissions.sync_params import (
 from ee.onyx.external_permissions.sync_params import get_source_perm_sync_config
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_find_task
+from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.background.celery.tasks.beat_schedule import CLOUD_BEAT_MULTIPLIER_DEFAULT
 from onyx.background.error_logging import emit_background_error
@@ -162,7 +163,6 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:
    # (which lives on a different db number)
    r = get_redis_client()
    r_replica = get_redis_replica_client()
-    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_EXTERNAL_GROUP_SYNC_BEAT_LOCK,
@@ -221,6 +221,7 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:
            # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),
            # or be currently executing
            try:
+                r_celery = celery_get_broker_client(self.app)
                validate_external_group_sync_fences(
                    tenant_id, self.app, r, r_replica, r_celery, lock_beat
                )
--- a/backend/ee/onyx/background/celery/tasks/hooks/init.py
+++ b/backend/ee/onyx/background/celery/tasks/hooks/init.py
--- a/backend/ee/onyx/background/celery/tasks/hooks/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/hooks/tasks.py
--- a/backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/tenant_provisioning/tasks.py
@@ -13,6 +13,7 @@ from redis.lock import Lock as RedisLock
 from ee.onyx.server.tenants.provisioning import setup_tenant
 from ee.onyx.server.tenants.schema_management import create_schema_if_not_exists
 from ee.onyx.server.tenants.schema_management import get_current_alembic_version
+from ee.onyx.server.tenants.schema_management import run_alembic_migrations
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.configs.app_configs import TARGET_AVAILABLE_TENANTS
 from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
@@ -29,9 +30,10 @@ from shared_configs.configs import TENANT_ID_PREFIX
 # Each tenant takes ~80s (alembic migrations), so 5 tenants ≈ 7 minutes.
 _MAX_TENANTS_PER_RUN = 5

-# Time limits sized for worst-case batch: _MAX_TENANTS_PER_RUN × ~90s + buffer.
-_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 10  # 10 minutes
-_TENANT_PROVISIONING_TIME_LIMIT = 60 * 15  # 15 minutes
+# Time limits sized for worst-case: provisioning up to _MAX_TENANTS_PER_RUN new tenants
+# (~90s each) plus migrating up to TARGET_AVAILABLE_TENANTS pool tenants (~90s each).
+_TENANT_PROVISIONING_SOFT_TIME_LIMIT = 60 * 20  # 20 minutes
+_TENANT_PROVISIONING_TIME_LIMIT = 60 * 25  # 25 minutes


@shared_task(
@@ -91,8 +93,7 @@ def check_available_tenants(self: Task) -> None:  # noqa: ARG001
        batch_size = min(tenants_to_provision, _MAX_TENANTS_PER_RUN)
        if batch_size < tenants_to_provision:
            task_logger.info(
-                f"Capping batch to {batch_size} "
-                f"(need {tenants_to_provision}, will catch up next cycle)"
+                f"Capping batch to {batch_size} (need {tenants_to_provision}, will catch up next cycle)"
            )

        provisioned = 0
@@ -103,12 +104,14 @@ def check_available_tenants(self: Task) -> None:  # noqa: ARG001
                    provisioned += 1
            except Exception:
                task_logger.exception(
-                    f"Failed to provision tenant {i + 1}/{batch_size}, "
-                    "continuing with remaining tenants"
+                    f"Failed to provision tenant {i + 1}/{batch_size}, continuing with remaining tenants"
                )

        task_logger.info(f"Provisioning complete: {provisioned}/{batch_size} succeeded")

+        # Migrate any pool tenants that were provisioned before a new migration was deployed
+        _migrate_stale_pool_tenants()
+
    except Exception:
        task_logger.exception("Error in check_available_tenants task")

@@ -121,6 +124,46 @@ def check_available_tenants(self: Task) -> None:  # noqa: ARG001
            )


+def _migrate_stale_pool_tenants() -> None:
+    """
+    Run alembic upgrade head on all pool tenants. Since alembic upgrade head is
+    idempotent, tenants already at head are a fast no-op. This ensures pool
+    tenants are always current so that signup doesn't hit schema mismatches
+    (e.g. missing columns added after the tenant was pre-provisioned).
+    """
+    with get_session_with_shared_schema() as db_session:
+        pool_tenants = db_session.query(AvailableTenant).all()
+        tenant_ids = [t.tenant_id for t in pool_tenants]
+
+    if not tenant_ids:
+        return
+
+    task_logger.info(
+        f"Checking {len(tenant_ids)} pool tenant(s) for pending migrations"
+    )
+
+    for tenant_id in tenant_ids:
+        try:
+            run_alembic_migrations(tenant_id)
+            new_version = get_current_alembic_version(tenant_id)
+            with get_session_with_shared_schema() as db_session:
+                tenant = (
+                    db_session.query(AvailableTenant)
+                    .filter_by(tenant_id=tenant_id)
+                    .first()
+                )
+                if tenant and tenant.alembic_version != new_version:
+                    task_logger.info(
+                        f"Migrated pool tenant {tenant_id}: {tenant.alembic_version} -> {new_version}"
+                    )
+                    tenant.alembic_version = new_version
+                    db_session.commit()
+        except Exception:
+            task_logger.exception(
+                f"Failed to migrate pool tenant {tenant_id}, skipping"
+            )
+
+
 def pre_provision_tenant() -> bool:
    """
    Pre-provision a new tenant and store it in the NewAvailableTenant table.
--- a/backend/ee/onyx/configs/license_enforcement_config.py
+++ b/backend/ee/onyx/configs/license_enforcement_config.py
@@ -69,5 +69,7 @@ EE_ONLY_PATH_PREFIXES: frozenset[str] = frozenset(
        "/admin/token-rate-limits",
        # Evals
        "/evals",
+        # Hook extensions
+        "/admin/hooks",
    }
 )
--- a/backend/ee/onyx/hooks/init.py
+++ b/backend/ee/onyx/hooks/init.py
--- a/backend/ee/onyx/hooks/executor.py
+++ b/backend/ee/onyx/hooks/executor.py
@@ -0,0 +1,385 @@
+"""Hook executor — calls a customer's external HTTP endpoint for a given hook point.
+
+Usage (Celery tasks and FastAPI handlers):
+    result = execute_hook(
+        db_session=db_session,
+        hook_point=HookPoint.QUERY_PROCESSING,
+        payload={"query": "...", "user_email": "...", "chat_session_id": "..."},
+        response_type=QueryProcessingResponse,
+    )
+
+    if isinstance(result, HookSkipped):
+        # no active hook configured — continue with original behavior
+        ...
+    elif isinstance(result, HookSoftFailed):
+        # hook failed but fail strategy is SOFT — continue with original behavior
+        ...
+    else:
+        # result is a validated Pydantic model instance (response_type)
+        ...
+
+is_reachable update policy
+--------------------------
+``is_reachable`` on the Hook row is updated selectively — only when the outcome
+carries meaningful signal about physical reachability:
+
+  NetworkError (DNS, connection refused)  → False  (cannot reach the server)
+  HTTP 401 / 403                          → False  (api_key revoked or invalid)
+  TimeoutException                        → None   (server may be slow, skip write)
+  Other HTTP errors (4xx / 5xx)           → None   (server responded, skip write)
+  Unknown exception                       → None   (no signal, skip write)
+  Non-JSON / non-dict response            → None   (server responded, skip write)
+  Success (2xx, valid dict)               → True   (confirmed reachable)
+
+None means "leave the current value unchanged" — no DB round-trip is made.
+
+DB session design
+-----------------
+The executor uses three sessions:
+
+  1. Caller's session (db_session) — used only for the hook lookup read. All
+     needed fields are extracted from the Hook object before the HTTP call, so
+     the caller's session is not held open during the external HTTP request.
+
+  2. Log session — a separate short-lived session opened after the HTTP call
+     completes to write the HookExecutionLog row on failure. Success runs are
+     not recorded. Committed independently of everything else.
+
+  3. Reachable session — a second short-lived session to update is_reachable on
+     the Hook. Kept separate from the log session so a concurrent hook deletion
+     (which causes update_hook__no_commit to raise OnyxError(NOT_FOUND)) cannot
+     prevent the execution log from being written. This update is best-effort.
+"""
+
+import json
+import time
+from typing import Any
+from typing import TypeVar
+
+import httpx
+from pydantic import BaseModel
+from pydantic import ValidationError
+from sqlalchemy.orm import Session
+
+from onyx.db.engine.sql_engine import get_session_with_current_tenant
+from onyx.db.enums import HookFailStrategy
+from onyx.db.enums import HookPoint
+from onyx.db.hook import create_hook_execution_log__no_commit
+from onyx.db.hook import get_non_deleted_hook_by_hook_point
+from onyx.db.hook import update_hook__no_commit
+from onyx.db.models import Hook
+from onyx.error_handling.error_codes import OnyxErrorCode
+from onyx.error_handling.exceptions import OnyxError
+from onyx.hooks.executor import HookSkipped
+from onyx.hooks.executor import HookSoftFailed
+from onyx.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT
+
+logger = setup_logger()
+
+
+T = TypeVar("T", bound=BaseModel)
+
+
+# ---------------------------------------------------------------------------
+# Private helpers
+# ---------------------------------------------------------------------------
+
+
+class _HttpOutcome(BaseModel):
+    """Structured result of an HTTP hook call, returned by _process_response."""
+
+    is_success: bool
+    updated_is_reachable: (
+        bool | None
+    )  # True/False = write to DB, None = unchanged (skip write)
+    status_code: int | None
+    error_message: str | None
+    response_payload: dict[str, Any] | None
+
+
+def _lookup_hook(
+    db_session: Session,
+    hook_point: HookPoint,
+) -> Hook | HookSkipped:
+    """Return the active Hook or HookSkipped if hooks are unavailable/unconfigured.
+
+    No HTTP call is made and no DB writes are performed for any HookSkipped path.
+    There is nothing to log and no reachability information to update.
+    """
+    if MULTI_TENANT:
+        return HookSkipped()
+    hook = get_non_deleted_hook_by_hook_point(
+        db_session=db_session, hook_point=hook_point
+    )
+    if hook is None or not hook.is_active:
+        return HookSkipped()
+    if not hook.endpoint_url:
+        return HookSkipped()
+    return hook
+
+
+def _process_response(
+    *,
+    response: httpx.Response | None,
+    exc: Exception | None,
+    timeout: float,
+) -> _HttpOutcome:
+    """Process the result of an HTTP call and return a structured outcome.
+
+    Called after the client.post() try/except. If post() raised, exc is set and
+    response is None. Otherwise response is set and exc is None. Handles
+    raise_for_status(), JSON decoding, and the dict shape check.
+    """
+    if exc is not None:
+        if isinstance(exc, httpx.NetworkError):
+            msg = f"Hook network error (endpoint unreachable): {exc}"
+            logger.warning(msg, exc_info=exc)
+            return _HttpOutcome(
+                is_success=False,
+                updated_is_reachable=False,
+                status_code=None,
+                error_message=msg,
+                response_payload=None,
+            )
+        if isinstance(exc, httpx.TimeoutException):
+            msg = f"Hook timed out after {timeout}s: {exc}"
+            logger.warning(msg, exc_info=exc)
+            return _HttpOutcome(
+                is_success=False,
+                updated_is_reachable=None,  # timeout doesn't indicate unreachability
+                status_code=None,
+                error_message=msg,
+                response_payload=None,
+            )
+        msg = f"Hook call failed: {exc}"
+        logger.exception(msg, exc_info=exc)
+        return _HttpOutcome(
+            is_success=False,
+            updated_is_reachable=None,  # unknown error — don't make assumptions
+            status_code=None,
+            error_message=msg,
+            response_payload=None,
+        )
+
+    if response is None:
+        raise ValueError(
+            "exactly one of response or exc must be non-None; both are None"
+        )
+    status_code = response.status_code
+
+    try:
+        response.raise_for_status()
+    except httpx.HTTPStatusError as e:
+        msg = f"Hook returned HTTP {e.response.status_code}: {e.response.text}"
+        logger.warning(msg, exc_info=e)
+        # 401/403 means the api_key has been revoked or is invalid — mark unreachable
+        # so the operator knows to update it. All other HTTP errors keep is_reachable
+        # as-is (server is up, the request just failed for application reasons).
+        auth_failed = e.response.status_code in (401, 403)
+        return _HttpOutcome(
+            is_success=False,
+            updated_is_reachable=False if auth_failed else None,
+            status_code=status_code,
+            error_message=msg,
+            response_payload=None,
+        )
+
+    try:
+        response_payload = response.json()
+    except (json.JSONDecodeError, httpx.DecodingError) as e:
+        msg = f"Hook returned non-JSON response: {e}"
+        logger.warning(msg, exc_info=e)
+        return _HttpOutcome(
+            is_success=False,
+            updated_is_reachable=None,  # server responded — reachability unchanged
+            status_code=status_code,
+            error_message=msg,
+            response_payload=None,
+        )
+
+    if not isinstance(response_payload, dict):
+        msg = f"Hook returned non-dict JSON (got {type(response_payload).__name__})"
+        logger.warning(msg)
+        return _HttpOutcome(
+            is_success=False,
+            updated_is_reachable=None,  # server responded — reachability unchanged
+            status_code=status_code,
+            error_message=msg,
+            response_payload=None,
+        )
+
+    return _HttpOutcome(
+        is_success=True,
+        updated_is_reachable=True,
+        status_code=status_code,
+        error_message=None,
+        response_payload=response_payload,
+    )
+
+
+def _persist_result(
+    *,
+    hook_id: int,
+    outcome: _HttpOutcome,
+    duration_ms: int,
+) -> None:
+    """Write the execution log on failure and optionally update is_reachable, each
+    in its own session so a failure in one does not affect the other."""
+    # Only write the execution log on failure — success runs are not recorded.
+    # Must not be skipped if the is_reachable update fails (e.g. hook concurrently
+    # deleted between the initial lookup and here).
+    if not outcome.is_success:
+        try:
+            with get_session_with_current_tenant() as log_session:
+                create_hook_execution_log__no_commit(
+                    db_session=log_session,
+                    hook_id=hook_id,
+                    is_success=False,
+                    error_message=outcome.error_message,
+                    status_code=outcome.status_code,
+                    duration_ms=duration_ms,
+                )
+                log_session.commit()
+        except Exception:
+            logger.exception(
+                f"Failed to persist hook execution log for hook_id={hook_id}"
+            )
+
+    # Update is_reachable separately — best-effort, non-critical.
+    # None means the value is unchanged (set by the caller to skip the no-op write).
+    # update_hook__no_commit can raise OnyxError(NOT_FOUND) if the hook was
+    # concurrently deleted, so keep this isolated from the log write above.
+    if outcome.updated_is_reachable is not None:
+        try:
+            with get_session_with_current_tenant() as reachable_session:
+                update_hook__no_commit(
+                    db_session=reachable_session,
+                    hook_id=hook_id,
+                    is_reachable=outcome.updated_is_reachable,
+                )
+                reachable_session.commit()
+        except Exception:
+            logger.warning(f"Failed to update is_reachable for hook_id={hook_id}")
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def _execute_hook_inner(
+    hook: Hook,
+    payload: dict[str, Any],
+    response_type: type[T],
+) -> T | HookSoftFailed:
+    """Make the HTTP call, validate the response, and return a typed model.
+
+    Raises OnyxError on HARD failure. Returns HookSoftFailed on SOFT failure.
+    """
+    timeout = hook.timeout_seconds
+    hook_id = hook.id
+    fail_strategy = hook.fail_strategy
+    endpoint_url = hook.endpoint_url
+    current_is_reachable: bool | None = hook.is_reachable
+
+    if not endpoint_url:
+        raise ValueError(
+            f"hook_id={hook_id} is active but has no endpoint_url — "
+            "active hooks without an endpoint_url must be rejected by _lookup_hook"
+        )
+
+    start = time.monotonic()
+    response: httpx.Response | None = None
+    exc: Exception | None = None
+    try:
+        api_key: str | None = (
+            hook.api_key.get_value(apply_mask=False) if hook.api_key else None
+        )
+        headers: dict[str, str] = {"Content-Type": "application/json"}
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+        with httpx.Client(
+            timeout=timeout, follow_redirects=False
+        ) as client:  # SSRF guard: never follow redirects
+            response = client.post(endpoint_url, json=payload, headers=headers)
+    except Exception as e:
+        exc = e
+    duration_ms = int((time.monotonic() - start) * 1000)
+
+    outcome = _process_response(response=response, exc=exc, timeout=timeout)
+
+    # Validate the response payload against response_type.
+    # A validation failure downgrades the outcome to a failure so it is logged,
+    # is_reachable is left unchanged (server responded — just a bad payload),
+    # and fail_strategy is respected below.
+    validated_model: T | None = None
+    if outcome.is_success and outcome.response_payload is not None:
+        try:
+            validated_model = response_type.model_validate(outcome.response_payload)
+        except ValidationError as e:
+            msg = (
+                f"Hook response failed validation against {response_type.__name__}: {e}"
+            )
+            outcome = _HttpOutcome(
+                is_success=False,
+                updated_is_reachable=None,  # server responded — reachability unchanged
+                status_code=outcome.status_code,
+                error_message=msg,
+                response_payload=None,
+            )
+
+    # Skip the is_reachable write when the value would not change — avoids a
+    # no-op DB round-trip on every call when the hook is already in the expected state.
+    if outcome.updated_is_reachable == current_is_reachable:
+        outcome = outcome.model_copy(update={"updated_is_reachable": None})
+    _persist_result(hook_id=hook_id, outcome=outcome, duration_ms=duration_ms)
+
+    if not outcome.is_success:
+        if fail_strategy == HookFailStrategy.HARD:
+            raise OnyxError(
+                OnyxErrorCode.HOOK_EXECUTION_FAILED,
+                outcome.error_message or "Hook execution failed.",
+            )
+        logger.warning(
+            f"Hook execution failed (soft fail) for hook_id={hook_id}: {outcome.error_message}"
+        )
+        return HookSoftFailed()
+
+    if validated_model is None:
+        raise OnyxError(
+            OnyxErrorCode.INTERNAL_ERROR,
+            f"validated_model is None for successful hook call (hook_id={hook_id})",
+        )
+    return validated_model
+
+
+def _execute_hook_impl(
+    *,
+    db_session: Session,
+    hook_point: HookPoint,
+    payload: dict[str, Any],
+    response_type: type[T],
+) -> T | HookSkipped | HookSoftFailed:
+    """EE implementation — loaded by CE's execute_hook via fetch_versioned_implementation.
+
+    Returns HookSkipped if no active hook is configured, HookSoftFailed if the
+    hook failed with SOFT fail strategy, or a validated response model on success.
+    Raises OnyxError on HARD failure or if the hook is misconfigured.
+    """
+    hook = _lookup_hook(db_session, hook_point)
+    if isinstance(hook, HookSkipped):
+        return hook
+
+    fail_strategy = hook.fail_strategy
+    hook_id = hook.id
+
+    try:
+        return _execute_hook_inner(hook, payload, response_type)
+    except Exception:
+        if fail_strategy == HookFailStrategy.SOFT:
+            logger.exception(
+                f"Unexpected error in hook execution (soft fail) for hook_id={hook_id}"
+            )
+            return HookSoftFailed()
+        raise
--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -15,6 +15,7 @@ from ee.onyx.server.enterprise_settings.api import (
    basic_router as enterprise_settings_router,
 )
 from ee.onyx.server.evals.api import router as evals_router
+from ee.onyx.server.features.hooks.api import router as hook_router
 from ee.onyx.server.license.api import router as license_router
 from ee.onyx.server.manage.standard_answer import router as standard_answer_router
 from ee.onyx.server.middleware.license_enforcement import (
@@ -138,6 +139,7 @@ def get_application() -> FastAPI:
    include_router_with_global_prefix_prepended(application, ee_oauth_router)
    include_router_with_global_prefix_prepended(application, ee_document_cc_pair_router)
    include_router_with_global_prefix_prepended(application, evals_router)
+    include_router_with_global_prefix_prepended(application, hook_router)

    # Enterprise-only global settings
    include_router_with_global_prefix_prepended(
--- a/backend/ee/onyx/server/features/init.py
+++ b/backend/ee/onyx/server/features/init.py
--- a/backend/ee/onyx/server/features/hooks/init.py
+++ b/backend/ee/onyx/server/features/hooks/init.py
--- a/backend/ee/onyx/server/features/hooks/api.py
+++ b/backend/ee/onyx/server/features/hooks/api.py
@@ -123,9 +123,8 @@ def _validate_endpoint(
    (not reachable — indicates the api_key is invalid).

    Timeout handling:
-    - ConnectTimeout: TCP handshake never completed → cannot_connect.
-    - ReadTimeout / WriteTimeout: TCP was established, server responded slowly → timeout
-      (operator should consider increasing timeout_seconds).
+    - Any httpx.TimeoutException (ConnectTimeout, ReadTimeout, WriteTimeout, PoolTimeout) →
+      timeout (operator should consider increasing timeout_seconds).
    - All other exceptions → cannot_connect.
    """
    _check_ssrf_safety(endpoint_url)
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -99,6 +99,26 @@ async def get_or_provision_tenant(
        tenant_id = await get_available_tenant()

        if tenant_id:
+            # Run migrations to ensure the pre-provisioned tenant schema is current.
+            # Pool tenants may have been created before a new migration was deployed.
+            # Capture as a non-optional local so mypy can type the lambda correctly.
+            _tenant_id: str = tenant_id
+            loop = asyncio.get_running_loop()
+            try:
+                await loop.run_in_executor(
+                    None, lambda: run_alembic_migrations(_tenant_id)
+                )
+            except Exception:
+                # The tenant was already dequeued from the pool — roll it back so
+                # it doesn't end up orphaned (schema exists, but not assigned to anyone).
+                logger.exception(
+                    f"Migration failed for pre-provisioned tenant {_tenant_id}; rolling back"
+                )
+                try:
+                    await rollback_tenant_provisioning(_tenant_id)
+                except Exception:
+                    logger.exception(f"Failed to rollback orphaned tenant {_tenant_id}")
+                raise
            # If we have a pre-provisioned tenant, assign it to the user
            await assign_tenant_to_user(tenant_id, email, referral_source)
            logger.info(f"Assigned pre-provisioned tenant {tenant_id} to user {email}")
--- a/backend/model_server/main.py
+++ b/backend/model_server/main.py
@@ -100,6 +100,7 @@ def get_model_app() -> FastAPI:
            dsn=SENTRY_DSN,
            integrations=[StarletteIntegration(), FastApiIntegration()],
            traces_sample_rate=0.1,
+            release=__version__,
        )
        logger.info("Sentry initialized")
    else:
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -20,6 +20,7 @@ from sentry_sdk.integrations.celery import CeleryIntegration
 from sqlalchemy import text
 from sqlalchemy.orm import Session

+from onyx import __version__
 from onyx.background.celery.apps.task_formatters import CeleryTaskColoredFormatter
 from onyx.background.celery.apps.task_formatters import CeleryTaskPlainFormatter
 from onyx.background.celery.celery_utils import celery_is_worker_primary
@@ -65,6 +66,7 @@ if SENTRY_DSN:
        dsn=SENTRY_DSN,
        integrations=[CeleryIntegration()],
        traces_sample_rate=0.1,
+        release=__version__,
    )
    logger.info("Sentry initialized")
 else:
@@ -515,7 +517,8 @@ def reset_tenant_id(


 def wait_for_vespa_or_shutdown(
-    sender: Any, **kwargs: Any  # noqa: ARG001
+    sender: Any,  # noqa: ARG001
+    **kwargs: Any,  # noqa: ARG001
 ) -> None:  # noqa: ARG001
    """Waits for Vespa to become ready subject to a timeout.
    Raises WorkerShutdown if the timeout is reached."""
--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -317,7 +317,6 @@ celery_app.autodiscover_tasks(
            "onyx.background.celery.tasks.docprocessing",
            "onyx.background.celery.tasks.evals",
            "onyx.background.celery.tasks.hierarchyfetching",
-            "onyx.background.celery.tasks.hooks",
            "onyx.background.celery.tasks.periodic",
            "onyx.background.celery.tasks.pruning",
            "onyx.background.celery.tasks.shared",
--- a/backend/onyx/background/celery/celery_redis.py
+++ b/backend/onyx/background/celery/celery_redis.py
@@ -1,5 +1,6 @@
 # These are helper objects for tracking the keys we need to write in redis
 import json
+import threading
 from typing import Any
 from typing import cast

@@ -7,7 +8,59 @@ from celery import Celery
 from redis import Redis

 from onyx.background.celery.configs.base import CELERY_SEPARATOR
+from onyx.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL
 from onyx.configs.constants import OnyxCeleryPriority
+from onyx.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS
+
+
+_broker_client: Redis | None = None
+_broker_url: str | None = None
+_broker_client_lock = threading.Lock()
+
+
+def celery_get_broker_client(app: Celery) -> Redis:
+    """Return a shared Redis client connected to the Celery broker DB.
+
+    Uses a module-level singleton so all tasks on a worker share one
+    connection instead of creating a new one per call. The client
+    connects directly to the broker Redis DB (parsed from the broker URL).
+
+    Thread-safe via lock — safe for use in Celery thread-pool workers.
+
+    Usage:
+        r_celery = celery_get_broker_client(self.app)
+        length = celery_get_queue_length(queue, r_celery)
+    """
+    global _broker_client, _broker_url
+    with _broker_client_lock:
+        url = app.conf.broker_url
+        if _broker_client is not None and _broker_url == url:
+            try:
+                _broker_client.ping()
+                return _broker_client
+            except Exception:
+                try:
+                    _broker_client.close()
+                except Exception:
+                    pass
+                _broker_client = None
+        elif _broker_client is not None:
+            try:
+                _broker_client.close()
+            except Exception:
+                pass
+            _broker_client = None
+
+        _broker_url = url
+        _broker_client = Redis.from_url(
+            url,
+            decode_responses=False,
+            health_check_interval=REDIS_HEALTH_CHECK_INTERVAL,
+            socket_keepalive=True,
+            socket_keepalive_options=REDIS_SOCKET_KEEPALIVE_OPTIONS,
+            retry_on_timeout=True,
+        )
+        return _broker_client


 def celery_get_unacked_length(r: Redis) -> int:
--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -14,7 +14,6 @@ from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
-from onyx.hooks.utils import HOOKS_AVAILABLE
 from shared_configs.configs import MULTI_TENANT

 # choosing 15 minutes because it roughly gives us enough time to process many tasks
@@ -362,19 +361,6 @@ if not MULTI_TENANT:

    tasks_to_schedule.extend(beat_task_templates)

-if HOOKS_AVAILABLE:
-    tasks_to_schedule.append(
-        {
-            "name": "hook-execution-log-cleanup",
-            "task": OnyxCeleryTask.HOOK_EXECUTION_LOG_CLEANUP_TASK,
-            "schedule": timedelta(days=1),
-            "options": {
-                "priority": OnyxCeleryPriority.LOW,
-                "expires": BEAT_EXPIRES_DEFAULT,
-            },
-        }
-    )
-

 def generate_cloud_tasks(
    beat_tasks: list[dict], beat_templates: list[dict], beat_multiplier: float
--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -14,6 +14,7 @@ from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
+from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_queued_task_ids
 from onyx.configs.app_configs import JOB_TIMEOUT
@@ -132,7 +133,6 @@ def revoke_tasks_blocking_deletion(
 def check_for_connector_deletion_task(self: Task, *, tenant_id: str) -> bool | None:
    r = get_redis_client()
    r_replica = get_redis_replica_client()
-    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK,
@@ -149,6 +149,7 @@ def check_for_connector_deletion_task(self: Task, *, tenant_id: str) -> bool | N
        if not r.exists(OnyxRedisSignals.BLOCK_VALIDATE_CONNECTOR_DELETION_FENCES):
            # clear fences that don't have associated celery tasks in progress
            try:
+                r_celery = celery_get_broker_client(self.app)
                validate_connector_deletion_fences(
                    tenant_id, r, r_replica, r_celery, lock_beat
                )
--- a/backend/onyx/background/celery/tasks/docfetching/tasks.py
+++ b/backend/onyx/background/celery/tasks/docfetching/tasks.py
@@ -9,6 +9,7 @@ from celery import Celery
 from celery import shared_task
 from celery import Task

+from onyx import __version__
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.memory_monitoring import emit_process_memory
 from onyx.background.celery.tasks.docprocessing.heartbeat import start_heartbeat
@@ -137,6 +138,7 @@ def _docfetching_task(
        sentry_sdk.init(
            dsn=SENTRY_DSN,
            traces_sample_rate=0.1,
+            release=__version__,
        )
        logger.info("Sentry initialized")
    else:
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -22,6 +22,7 @@ from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_find_task
+from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.background.celery.celery_utils import httpx_init_vespa_pool
 from onyx.background.celery.memory_monitoring import emit_process_memory
@@ -318,6 +319,11 @@ def monitor_indexing_attempt_progress(
    )

    current_db_time = get_db_current_time(db_session)
+    total_batches: int | str = (
+        coordination_status.total_batches
+        if coordination_status.total_batches is not None
+        else "?"
+    )
    if coordination_status.found:
        task_logger.info(
            f"Indexing attempt progress: "
@@ -325,7 +331,7 @@ def monitor_indexing_attempt_progress(
            f"cc_pair={attempt.connector_credential_pair_id} "
            f"search_settings={attempt.search_settings_id} "
            f"completed_batches={coordination_status.completed_batches} "
-            f"total_batches={coordination_status.total_batches or '?'} "
+            f"total_batches={total_batches} "
            f"total_docs={coordination_status.total_docs} "
            f"total_failures={coordination_status.total_failures}"
            f"elapsed={(current_db_time - attempt.time_created).seconds}"
@@ -409,7 +415,7 @@ def check_indexing_completion(
    logger.info(
        f"Indexing status: "
        f"indexing_completed={indexing_completed} "
-        f"batches_processed={batches_processed}/{batches_total or '?'} "
+        f"batches_processed={batches_processed}/{batches_total if batches_total is not None else '?'} "
        f"total_docs={coordination_status.total_docs} "
        f"total_chunks={coordination_status.total_chunks} "
        f"total_failures={coordination_status.total_failures}"
@@ -449,7 +455,7 @@ def check_indexing_completion(
            ):
                # Check if the task exists in the celery queue
                # This handles the case where Redis dies after task creation but before task execution
-                redis_celery = task.app.broker_connection().channel().client  # type: ignore
+                redis_celery = celery_get_broker_client(task.app)
                task_exists = celery_find_task(
                    attempt.celery_task_id,
                    OnyxCeleryQueues.CONNECTOR_DOC_FETCHING,
--- a/backend/onyx/background/celery/tasks/monitoring/tasks.py
+++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py
@@ -1,6 +1,5 @@
 import json
 import time
-from collections.abc import Callable
 from datetime import timedelta
 from itertools import islice
 from typing import Any
@@ -19,6 +18,7 @@ from sqlalchemy import text
 from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
+from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.background.celery.memory_monitoring import emit_process_memory
@@ -698,31 +698,27 @@ def monitor_background_processes(self: Task, *, tenant_id: str) -> None:
        return None

    try:
-        # Get Redis client for Celery broker
-        redis_celery = self.app.broker_connection().channel().client  # type: ignore
        redis_std = get_redis_client()

-        # Define metric collection functions and their dependencies
-        metric_functions: list[Callable[[], list[Metric]]] = [
-            lambda: _collect_queue_metrics(redis_celery),
-            lambda: _collect_connector_metrics(db_session, redis_std),
-            lambda: _collect_sync_metrics(db_session, redis_std),
-        ]
+        # Collect queue metrics with broker connection
+        r_celery = celery_get_broker_client(self.app)
+        queue_metrics = _collect_queue_metrics(r_celery)

-        # Collect and log each metric
+        # Collect remaining metrics (no broker connection needed)
        with get_session_with_current_tenant() as db_session:
-            for metric_fn in metric_functions:
-                metrics = metric_fn()
-                for metric in metrics:
-                    # double check to make sure we aren't double-emitting metrics
-                    if metric.key is None or not _has_metric_been_emitted(
-                        redis_std, metric.key
-                    ):
-                        metric.log()
-                        metric.emit(tenant_id)
+            all_metrics: list[Metric] = queue_metrics
+            all_metrics.extend(_collect_connector_metrics(db_session, redis_std))
+            all_metrics.extend(_collect_sync_metrics(db_session, redis_std))

-                    if metric.key is not None:
-                        _mark_metric_as_emitted(redis_std, metric.key)
+            for metric in all_metrics:
+                if metric.key is None or not _has_metric_been_emitted(
+                    redis_std, metric.key
+                ):
+                    metric.log()
+                    metric.emit(tenant_id)
+
+                if metric.key is not None:
+                    _mark_metric_as_emitted(redis_std, metric.key)

        task_logger.info("Successfully collected background metrics")
    except SoftTimeLimitExceeded:
@@ -890,7 +886,7 @@ def monitor_celery_queues_helper(
 ) -> None:
    """A task to monitor all celery queue lengths."""

-    r_celery = task.app.broker_connection().channel().client  # type: ignore
+    r_celery = celery_get_broker_client(task.app)
    n_celery = celery_get_queue_length(OnyxCeleryQueues.PRIMARY, r_celery)
    n_docfetching = celery_get_queue_length(
        OnyxCeleryQueues.CONNECTOR_DOC_FETCHING, r_celery
@@ -1080,7 +1076,7 @@ def cloud_monitor_celery_pidbox(
    num_deleted = 0

    MAX_PIDBOX_IDLE = 24 * 3600  # 1 day in seconds
-    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore
+    r_celery = celery_get_broker_client(self.app)
    for key in r_celery.scan_iter("*.reply.celery.pidbox"):
        key_bytes = cast(bytes, key)
        key_str = key_bytes.decode("utf-8")
--- a/backend/onyx/background/celery/tasks/pruning/tasks.py
+++ b/backend/onyx/background/celery/tasks/pruning/tasks.py
@@ -17,6 +17,7 @@ from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_find_task
+from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_queued_task_ids
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
@@ -203,7 +204,6 @@ def _is_pruning_due(cc_pair: ConnectorCredentialPair) -> bool:
 def check_for_pruning(self: Task, *, tenant_id: str) -> bool | None:
    r = get_redis_client()
    r_replica = get_redis_replica_client()
-    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_PRUNE_BEAT_LOCK,
@@ -261,6 +261,7 @@ def check_for_pruning(self: Task, *, tenant_id: str) -> bool | None:
            # tasks can be in the queue in redis, in reserved tasks (prefetched by the worker),
            # or be currently executing
            try:
+                r_celery = celery_get_broker_client(self.app)
                validate_pruning_fences(tenant_id, r, r_replica, r_celery, lock_beat)
            except Exception:
                task_logger.exception("Exception while validating pruning fences")
--- a/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
+++ b/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
@@ -16,6 +16,7 @@ from sqlalchemy.orm import Session

 from onyx.access.access import build_access_for_user_files
 from onyx.background.celery.apps.app_base import task_logger
+from onyx.background.celery.celery_redis import celery_get_broker_client
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_utils import httpx_init_vespa_pool
 from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
@@ -105,7 +106,7 @@ def _user_file_delete_queued_key(user_file_id: str | UUID) -> str:


 def get_user_file_project_sync_queue_depth(celery_app: Celery) -> int:
-    redis_celery: Redis = celery_app.broker_connection().channel().client  # type: ignore
+    redis_celery = celery_get_broker_client(celery_app)
    return celery_get_queue_length(
        OnyxCeleryQueues.USER_FILE_PROJECT_SYNC, redis_celery
    )
@@ -238,7 +239,7 @@ def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
    skipped_guard = 0
    try:
        # --- Protection 1: queue depth backpressure ---
-        r_celery = self.app.broker_connection().channel().client  # type: ignore
+        r_celery = celery_get_broker_client(self.app)
        queue_len = celery_get_queue_length(
            OnyxCeleryQueues.USER_FILE_PROCESSING, r_celery
        )
@@ -591,7 +592,7 @@ def check_for_user_file_delete(self: Task, *, tenant_id: str) -> None:
        # --- Protection 1: queue depth backpressure ---
        # NOTE: must use the broker's Redis client (not redis_client) because
        # Celery queues live on a separate Redis DB with CELERY_SEPARATOR keys.
-        r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore
+        r_celery = celery_get_broker_client(self.app)
        queue_len = celery_get_queue_length(OnyxCeleryQueues.USER_FILE_DELETE, r_celery)
        if queue_len > USER_FILE_DELETE_MAX_QUEUE_DEPTH:
            task_logger.warning(
--- a/backend/onyx/chat/chat_utils.py
+++ b/backend/onyx/chat/chat_utils.py
@@ -5,7 +5,6 @@ from typing import cast
 from uuid import UUID

 from fastapi.datastructures import Headers
-from pydantic import BaseModel
 from sqlalchemy.orm import Session

 from onyx.chat.models import ChatHistoryResult
@@ -52,60 +51,6 @@ logger = setup_logger()
 IMAGE_GENERATION_TOOL_NAME = "generate_image"


-class FileContextResult(BaseModel):
-    """Result of building a file's LLM context representation."""
-
-    message: ChatMessageSimple
-    tool_metadata: FileToolMetadata
-
-
-def build_file_context(
-    tool_file_id: str,
-    filename: str,
-    file_type: ChatFileType,
-    content_text: str | None = None,
-    token_count: int = 0,
-    approx_char_count: int | None = None,
-) -> FileContextResult:
-    """Build the LLM context representation for a single file.
-
-    Centralises how files should appear in the LLM prompt
-    — the ID that FileReaderTool accepts (``UserFile.id`` for user files).
-    """
-    if file_type.use_metadata_only():
-        message_text = (
-            f"File: {filename} (id={tool_file_id})\n"
-            "Use the file_reader or python tools to access "
-            "this file's contents."
-        )
-        message = ChatMessageSimple(
-            message=message_text,
-            token_count=max(1, len(message_text) // 4),
-            message_type=MessageType.USER,
-            file_id=tool_file_id,
-        )
-    else:
-        message_text = f"File: {filename}\n{content_text or ''}\nEnd of File"
-        message = ChatMessageSimple(
-            message=message_text,
-            token_count=token_count,
-            message_type=MessageType.USER,
-            file_id=tool_file_id,
-        )
-
-    metadata = FileToolMetadata(
-        file_id=tool_file_id,
-        filename=filename,
-        approx_char_count=(
-            approx_char_count
-            if approx_char_count is not None
-            else len(content_text or "")
-        ),
-    )
-
-    return FileContextResult(message=message, tool_metadata=metadata)
-
-
 def create_chat_session_from_request(
    chat_session_request: ChatSessionCreationRequest,
    user_id: UUID | None,
@@ -593,7 +538,7 @@ def convert_chat_history(
    for idx, chat_message in enumerate(chat_history):
        if chat_message.message_type == MessageType.USER:
            # Process files attached to this message
-            text_files: list[tuple[ChatLoadedFile, FileDescriptor]] = []
+            text_files: list[ChatLoadedFile] = []
            image_files: list[ChatLoadedFile] = []

            if chat_message.files:
@@ -604,26 +549,34 @@ def convert_chat_history(
                        if loaded_file.file_type == ChatFileType.IMAGE:
                            image_files.append(loaded_file)
                        else:
-                            # Text files (DOC, PLAIN_TEXT, TABULAR) are added as separate messages
-                            text_files.append((loaded_file, file_descriptor))
+                            # Text files (DOC, PLAIN_TEXT, CSV) are added as separate messages
+                            text_files.append(loaded_file)

            # Add text files as separate messages before the user message.
            # Each message is tagged with ``file_id`` so that forgotten files
            # can be detected after context-window truncation.
-            for text_file, fd in text_files:
-                # Use user_file_id as the FileReaderTool accepts that.
-                # Fall back to the file-store path id.
-                tool_id = fd.get("user_file_id") or text_file.file_id
-                filename = text_file.filename or "unknown"
-                ctx = build_file_context(
-                    tool_file_id=tool_id,
-                    filename=filename,
-                    file_type=text_file.file_type,
-                    content_text=text_file.content_text,
-                    token_count=text_file.token_count,
+            for text_file in text_files:
+                file_text = text_file.content_text or ""
+                filename = text_file.filename
+                message = (
+                    f"File: {filename}\n{file_text}\nEnd of File"
+                    if filename
+                    else file_text
+                )
+                simple_messages.append(
+                    ChatMessageSimple(
+                        message=message,
+                        token_count=text_file.token_count,
+                        message_type=MessageType.USER,
+                        image_files=None,
+                        file_id=text_file.file_id,
+                    )
+                )
+                all_injected_file_metadata[text_file.file_id] = FileToolMetadata(
+                    file_id=text_file.file_id,
+                    filename=filename or "unknown",
+                    approx_char_count=len(file_text),
                )
-                simple_messages.append(ctx.message)
-                all_injected_file_metadata[tool_id] = ctx.tool_metadata

            # Sum token counts from image files (excluding project image files)
            image_token_count = (
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -18,7 +18,6 @@ from onyx.cache.interface import CacheBackend
 from onyx.chat.chat_processing_checker import set_processing_status
 from onyx.chat.chat_state import ChatStateContainer
 from onyx.chat.chat_state import run_chat_loop_with_state_containers
-from onyx.chat.chat_utils import build_file_context
 from onyx.chat.chat_utils import convert_chat_history
 from onyx.chat.chat_utils import create_chat_history_chain
 from onyx.chat.chat_utils import create_chat_session_from_request
@@ -91,7 +90,6 @@ from onyx.llm.request_context import reset_llm_mock_response
 from onyx.llm.request_context import set_llm_mock_response
 from onyx.llm.utils import litellm_exception_to_error_msg
 from onyx.onyxbot.slack.models import SlackContext
-from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type
 from onyx.server.query_and_chat.models import AUTO_PLACE_AFTER_LATEST_MESSAGE
 from onyx.server.query_and_chat.models import MessageResponseIDInfo
 from onyx.server.query_and_chat.models import SendMessageRequest
@@ -119,8 +117,6 @@ from shared_configs.contextvars import get_current_tenant_id
 logger = setup_logger()
 ERROR_TYPE_CANCELLED = "cancelled"

-APPROX_CHARS_PER_TOKEN = 4
-

 class _AvailableFiles(BaseModel):
    """Separated file IDs for the FileReaderTool so it knows which loader to use."""
@@ -305,27 +301,16 @@ def extract_context_files(
    if not user_files:
        return _empty_extracted_context_files()

-    # Aggregate tokens for the file content that will be added
-    # Skip tokens for those with metadata only
-    aggregate_tokens = sum(
-        uf.token_count or 0
-        for uf in user_files
-        if not mime_type_to_chat_file_type(uf.file_type).use_metadata_only()
-    )
+    aggregate_tokens = sum(uf.token_count or 0 for uf in user_files)
    max_actual_tokens = (
        llm_max_context_window - reserved_token_count
    ) * max_llm_context_percentage

    if aggregate_tokens >= max_actual_tokens:
+        tool_metadata = []
        use_as_search_filter = not DISABLE_VECTOR_DB
        if DISABLE_VECTOR_DB:
-            tool_metadata = [_build_tool_metadata(uf) for uf in user_files]
-        else:
-            tool_metadata = [
-                _build_tool_metadata(uf)
-                for uf in user_files
-                if mime_type_to_chat_file_type(uf.file_type).use_metadata_only()
-            ]
+            tool_metadata = _build_file_tool_metadata_for_user_files(user_files)
        return ExtractedContextFiles(
            file_texts=[],
            image_files=[],
@@ -337,7 +322,7 @@ def extract_context_files(
        )

    # Files fit — load them into context
-    user_file_map = {uf.file_id: uf for uf in user_files}
+    user_file_map = {str(uf.id): uf for uf in user_files}
    in_memory_files = load_in_memory_chat_files(
        user_file_ids=[uf.id for uf in user_files],
        db_session=db_session,
@@ -346,23 +331,11 @@ def extract_context_files(
    file_texts: list[str] = []
    image_files: list[ChatLoadedFile] = []
    file_metadata: list[ContextFileMetadata] = []
-    tool_metadata: list[FileToolMetadata] = []
    total_token_count = 0

    for f in in_memory_files:
        uf = user_file_map.get(str(f.file_id))
-        filename = f.filename or f"file_{f.file_id}"
-
-        if f.file_type.use_metadata_only():
-            # Metadata-only files are not injected as full text.
-            # Only the metadata is provided, with LLM using tools
-            if not uf:
-                logger.error(
-                    f"File with id={f.file_id} in metadata-only path with no associated user file"
-                )
-                continue
-            tool_metadata.append(_build_tool_metadata(uf))
-        elif f.file_type.is_text_file():
+        if f.file_type.is_text_file():
            text_content = _extract_text_from_in_memory_file(f)
            if not text_content:
                continue
@@ -370,7 +343,7 @@ def extract_context_files(
            file_metadata.append(
                ContextFileMetadata(
                    file_id=str(f.file_id),
-                    filename=filename,
+                    filename=f.filename or f"file_{f.file_id}",
                    file_content=text_content,
                )
            )
@@ -397,22 +370,24 @@ def extract_context_files(
        total_token_count=total_token_count,
        file_metadata=file_metadata,
        uncapped_token_count=aggregate_tokens,
-        file_metadata_for_tool=tool_metadata,
    )


-def _build_tool_metadata(user_file: UserFile) -> FileToolMetadata:
-    """Build lightweight FileToolMetadata from a UserFile record.
+APPROX_CHARS_PER_TOKEN = 4

-    Delegates to ``build_file_context`` so that the file ID exposed to the
-    LLM is always consistent with what FileReaderTool expects.
-    """
-    return build_file_context(
-        tool_file_id=str(user_file.id),
-        filename=user_file.name,
-        file_type=mime_type_to_chat_file_type(user_file.file_type),
-        approx_char_count=(user_file.token_count or 0) * APPROX_CHARS_PER_TOKEN,
-    ).tool_metadata
+
+def _build_file_tool_metadata_for_user_files(
+    user_files: list[UserFile],
+) -> list[FileToolMetadata]:
+    """Build lightweight FileToolMetadata from a list of UserFile records."""
+    return [
+        FileToolMetadata(
+            file_id=str(uf.id),
+            filename=uf.name,
+            approx_char_count=(uf.token_count or 0) * APPROX_CHARS_PER_TOKEN,
+        )
+        for uf in user_files
+    ]


 def determine_search_params(
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -805,6 +805,10 @@ MINI_CHUNK_SIZE = 150
 # This is the number of regular chunks per large chunk
 LARGE_CHUNK_RATIO = 4

+# The maximum number of chunks that can be held for 1 document processing batch
+# The purpose of this is to set an upper bound on memory usage
+MAX_CHUNKS_PER_DOC_BATCH = int(os.environ.get("MAX_CHUNKS_PER_DOC_BATCH") or 1000)
+
 # Include the document level metadata in each chunk. If the metadata is too long, then it is thrown out
 # We don't want the metadata to overwhelm the actual contents of the chunk
 SKIP_METADATA_IN_CHUNK = os.environ.get("SKIP_METADATA_IN_CHUNK", "").lower() == "true"
@@ -1075,7 +1079,6 @@ POD_NAMESPACE = os.environ.get("POD_NAMESPACE")

 DEV_MODE = os.environ.get("DEV_MODE", "").lower() == "true"

-HOOK_ENABLED = os.environ.get("HOOK_ENABLED", "").lower() == "true"

 INTEGRATION_TESTS_MODE = os.environ.get("INTEGRATION_TESTS_MODE", "").lower() == "true"

--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -212,6 +212,7 @@ class DocumentSource(str, Enum):
    PRODUCTBOARD = "productboard"
    FILE = "file"
    CODA = "coda"
+    CANVAS = "canvas"
    NOTION = "notion"
    ZULIP = "zulip"
    LINEAR = "linear"
@@ -672,6 +673,7 @@ DocumentSourceDescription: dict[DocumentSource, str] = {
    DocumentSource.SLAB: "slab data",
    DocumentSource.PRODUCTBOARD: "productboard data (boards, etc.)",
    DocumentSource.FILE: "files",
+    DocumentSource.CANVAS: "canvas lms - courses, pages, assignments, and announcements",
    DocumentSource.CODA: "coda - team workspace with docs, tables, and pages",
    DocumentSource.NOTION: "notion data - a workspace that combines note-taking, \
 project management, and collaboration tools into a single, customizable platform",
--- a/backend/onyx/connectors/canvas/access.py
+++ b/backend/onyx/connectors/canvas/access.py
@@ -0,0 +1,32 @@
+"""
+Permissioning / AccessControl logic for Canvas courses.
+
+CE stub — returns None (no permissions). The EE implementation is loaded
+at runtime via ``fetch_versioned_implementation``.
+"""
+
+from collections.abc import Callable
+from typing import cast
+
+from onyx.access.models import ExternalAccess
+from onyx.connectors.canvas.client import CanvasApiClient
+from onyx.utils.variable_functionality import fetch_versioned_implementation
+from onyx.utils.variable_functionality import global_version
+
+
+def get_course_permissions(
+    canvas_client: CanvasApiClient,
+    course_id: int,
+) -> ExternalAccess | None:
+    if not global_version.is_ee_version():
+        return None
+
+    ee_get_course_permissions = cast(
+        Callable[[CanvasApiClient, int], ExternalAccess | None],
+        fetch_versioned_implementation(
+            "onyx.external_permissions.canvas.access",
+            "get_course_permissions",
+        ),
+    )
+
+    return ee_get_course_permissions(canvas_client, course_id)
--- a/backend/onyx/connectors/canvas/client.py
+++ b/backend/onyx/connectors/canvas/client.py
@@ -2,6 +2,7 @@ from __future__ import annotations

 import logging
 import re
+from collections.abc import Iterator
 from typing import Any
 from urllib.parse import urlparse

@@ -190,3 +191,22 @@ class CanvasApiClient:
        if clean_endpoint:
            final_url += "/" + clean_endpoint
        return final_url
+
+    def paginate(
+        self,
+        endpoint: str,
+        params: dict[str, Any] | None = None,
+    ) -> Iterator[list[Any]]:
+        """Yield each page of results, following Link-header pagination.
+
+        Makes the first request with endpoint + params, then follows
+        next_url from Link headers for subsequent pages.
+        """
+        response, next_url = self.get(endpoint, params=params)
+        while True:
+            if not response:
+                break
+            yield response
+            if not next_url:
+                break
+            response, next_url = self.get(full_url=next_url)
--- a/backend/onyx/connectors/canvas/connector.py
+++ b/backend/onyx/connectors/canvas/connector.py
@@ -1,17 +1,82 @@
+from datetime import datetime
+from datetime import timezone
+from typing import Any
+from typing import cast
 from typing import Literal
+from typing import NoReturn
 from typing import TypeAlias

 from pydantic import BaseModel
+from retry import retry
+from typing_extensions import override

+from onyx.access.models import ExternalAccess
+from onyx.configs.app_configs import INDEX_BATCH_SIZE
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.canvas.access import get_course_permissions
+from onyx.connectors.canvas.client import CanvasApiClient
+from onyx.connectors.exceptions import ConnectorValidationError
+from onyx.connectors.exceptions import CredentialExpiredError
+from onyx.connectors.exceptions import InsufficientPermissionsError
+from onyx.connectors.exceptions import UnexpectedValidationError
+from onyx.connectors.interfaces import CheckpointedConnectorWithPermSync
+from onyx.connectors.interfaces import CheckpointOutput
+from onyx.connectors.interfaces import GenerateSlimDocumentOutput
+from onyx.connectors.interfaces import SecondsSinceUnixEpoch
+from onyx.connectors.interfaces import SlimConnectorWithPermSync
 from onyx.connectors.models import ConnectorCheckpoint
+from onyx.connectors.models import ConnectorMissingCredentialError
+from onyx.connectors.models import Document
+from onyx.connectors.models import ImageSection
+from onyx.connectors.models import TextSection
+from onyx.error_handling.exceptions import OnyxError
+from onyx.file_processing.html_utils import parse_html_page_basic
+from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def _handle_canvas_api_error(e: OnyxError) -> NoReturn:
+    """Map Canvas API errors to connector framework exceptions."""
+    if e.status_code == 401:
+        raise CredentialExpiredError(
+            "Canvas API token is invalid or expired (HTTP 401)."
+        )
+    elif e.status_code == 403:
+        raise InsufficientPermissionsError(
+            "Canvas API token does not have sufficient permissions (HTTP 403)."
+        )
+    elif e.status_code == 429:
+        raise ConnectorValidationError(
+            "Canvas rate-limit exceeded (HTTP 429). Please try again later."
+        )
+    elif e.status_code >= 500:
+        raise UnexpectedValidationError(
+            f"Unexpected Canvas HTTP error (status={e.status_code}): {e}"
+        )
+    else:
+        raise ConnectorValidationError(
+            f"Canvas API error (status={e.status_code}): {e}"
+        )


 class CanvasCourse(BaseModel):
    id: int
-    name: str
-    course_code: str
-    created_at: str
-    workflow_state: str
+    name: str | None = None
+    course_code: str | None = None
+    created_at: str | None = None
+    workflow_state: str | None = None
+
+    @classmethod
+    def from_api(cls, payload: dict[str, Any]) -> "CanvasCourse":
+        return cls(
+            id=payload["id"],
+            name=payload.get("name"),
+            course_code=payload.get("course_code"),
+            created_at=payload.get("created_at"),
+            workflow_state=payload.get("workflow_state"),
+        )


 class CanvasPage(BaseModel):
@@ -19,10 +84,22 @@ class CanvasPage(BaseModel):
    url: str
    title: str
    body: str | None = None
-    created_at: str
-    updated_at: str
+    created_at: str | None = None
+    updated_at: str | None = None
    course_id: int

+    @classmethod
+    def from_api(cls, payload: dict[str, Any], course_id: int) -> "CanvasPage":
+        return cls(
+            page_id=payload["page_id"],
+            url=payload["url"],
+            title=payload["title"],
+            body=payload.get("body"),
+            created_at=payload.get("created_at"),
+            updated_at=payload.get("updated_at"),
+            course_id=course_id,
+        )
+

 class CanvasAssignment(BaseModel):
    id: int
@@ -30,10 +107,23 @@ class CanvasAssignment(BaseModel):
    description: str | None = None
    html_url: str
    course_id: int
-    created_at: str
-    updated_at: str
+    created_at: str | None = None
+    updated_at: str | None = None
    due_at: str | None = None

+    @classmethod
+    def from_api(cls, payload: dict[str, Any], course_id: int) -> "CanvasAssignment":
+        return cls(
+            id=payload["id"],
+            name=payload["name"],
+            description=payload.get("description"),
+            html_url=payload["html_url"],
+            course_id=course_id,
+            created_at=payload.get("created_at"),
+            updated_at=payload.get("updated_at"),
+            due_at=payload.get("due_at"),
+        )
+

 class CanvasAnnouncement(BaseModel):
    id: int
@@ -43,6 +133,17 @@ class CanvasAnnouncement(BaseModel):
    posted_at: str | None = None
    course_id: int

+    @classmethod
+    def from_api(cls, payload: dict[str, Any], course_id: int) -> "CanvasAnnouncement":
+        return cls(
+            id=payload["id"],
+            title=payload["title"],
+            message=payload.get("message"),
+            html_url=payload["html_url"],
+            posted_at=payload.get("posted_at"),
+            course_id=course_id,
+        )
+

 CanvasStage: TypeAlias = Literal["pages", "assignments", "announcements"]

@@ -72,3 +173,286 @@ class CanvasConnectorCheckpoint(ConnectorCheckpoint):
        self.current_course_index += 1
        self.stage = "pages"
        self.next_url = None
+
+
+class CanvasConnector(
+    CheckpointedConnectorWithPermSync[CanvasConnectorCheckpoint],
+    SlimConnectorWithPermSync,
+):
+    def __init__(
+        self,
+        canvas_base_url: str,
+        batch_size: int = INDEX_BATCH_SIZE,
+    ) -> None:
+        self.canvas_base_url = canvas_base_url.rstrip("/").removesuffix("/api/v1")
+        self.batch_size = batch_size
+        self._canvas_client: CanvasApiClient | None = None
+        self._course_permissions_cache: dict[int, ExternalAccess | None] = {}
+
+    @property
+    def canvas_client(self) -> CanvasApiClient:
+        if self._canvas_client is None:
+            raise ConnectorMissingCredentialError("Canvas")
+        return self._canvas_client
+
+    def _get_course_permissions(self, course_id: int) -> ExternalAccess | None:
+        """Get course permissions with caching."""
+        if course_id not in self._course_permissions_cache:
+            self._course_permissions_cache[course_id] = get_course_permissions(
+                canvas_client=self.canvas_client,
+                course_id=course_id,
+            )
+        return self._course_permissions_cache[course_id]
+
+    @retry(tries=3, delay=1, backoff=2)
+    def _list_courses(self) -> list[CanvasCourse]:
+        """Fetch all courses accessible to the authenticated user."""
+        logger.debug("Fetching Canvas courses")
+
+        courses: list[CanvasCourse] = []
+        for page in self.canvas_client.paginate(
+            "courses", params={"per_page": "100", "state[]": "available"}
+        ):
+            courses.extend(CanvasCourse.from_api(c) for c in page)
+        return courses
+
+    @retry(tries=3, delay=1, backoff=2)
+    def _list_pages(self, course_id: int) -> list[CanvasPage]:
+        """Fetch all pages for a given course."""
+        logger.debug(f"Fetching pages for course {course_id}")
+
+        pages: list[CanvasPage] = []
+        for page in self.canvas_client.paginate(
+            f"courses/{course_id}/pages",
+            params={"per_page": "100", "include[]": "body", "published": "true"},
+        ):
+            pages.extend(CanvasPage.from_api(p, course_id=course_id) for p in page)
+        return pages
+
+    @retry(tries=3, delay=1, backoff=2)
+    def _list_assignments(self, course_id: int) -> list[CanvasAssignment]:
+        """Fetch all assignments for a given course."""
+        logger.debug(f"Fetching assignments for course {course_id}")
+
+        assignments: list[CanvasAssignment] = []
+        for page in self.canvas_client.paginate(
+            f"courses/{course_id}/assignments",
+            params={"per_page": "100", "published": "true"},
+        ):
+            assignments.extend(
+                CanvasAssignment.from_api(a, course_id=course_id) for a in page
+            )
+        return assignments
+
+    @retry(tries=3, delay=1, backoff=2)
+    def _list_announcements(self, course_id: int) -> list[CanvasAnnouncement]:
+        """Fetch all announcements for a given course."""
+        logger.debug(f"Fetching announcements for course {course_id}")
+
+        announcements: list[CanvasAnnouncement] = []
+        for page in self.canvas_client.paginate(
+            "announcements",
+            params={
+                "per_page": "100",
+                "context_codes[]": f"course_{course_id}",
+                "active_only": "true",
+            },
+        ):
+            announcements.extend(
+                CanvasAnnouncement.from_api(a, course_id=course_id) for a in page
+            )
+        return announcements
+
+    def _build_document(
+        self,
+        doc_id: str,
+        link: str,
+        text: str,
+        semantic_identifier: str,
+        doc_updated_at: datetime | None,
+        course_id: int,
+        doc_type: str,
+    ) -> Document:
+        """Build a Document with standard Canvas fields."""
+        return Document(
+            id=doc_id,
+            sections=cast(
+                list[TextSection | ImageSection],
+                [TextSection(link=link, text=text)],
+            ),
+            source=DocumentSource.CANVAS,
+            semantic_identifier=semantic_identifier,
+            doc_updated_at=doc_updated_at,
+            metadata={"course_id": str(course_id), "type": doc_type},
+        )
+
+    def _convert_page_to_document(self, page: CanvasPage) -> Document:
+        """Convert a Canvas page to a Document."""
+        link = f"{self.canvas_base_url}/courses/{page.course_id}/pages/{page.url}"
+
+        text_parts = [page.title]
+        body_text = parse_html_page_basic(page.body) if page.body else ""
+        if body_text:
+            text_parts.append(body_text)
+
+        doc_updated_at = (
+            datetime.fromisoformat(page.updated_at.replace("Z", "+00:00")).astimezone(
+                timezone.utc
+            )
+            if page.updated_at
+            else None
+        )
+
+        document = self._build_document(
+            doc_id=f"canvas-page-{page.course_id}-{page.page_id}",
+            link=link,
+            text="\n\n".join(text_parts),
+            semantic_identifier=page.title or f"Page {page.page_id}",
+            doc_updated_at=doc_updated_at,
+            course_id=page.course_id,
+            doc_type="page",
+        )
+        return document
+
+    def _convert_assignment_to_document(self, assignment: CanvasAssignment) -> Document:
+        """Convert a Canvas assignment to a Document."""
+        text_parts = [assignment.name]
+        desc_text = (
+            parse_html_page_basic(assignment.description)
+            if assignment.description
+            else ""
+        )
+        if desc_text:
+            text_parts.append(desc_text)
+        if assignment.due_at:
+            due_dt = datetime.fromisoformat(
+                assignment.due_at.replace("Z", "+00:00")
+            ).astimezone(timezone.utc)
+            text_parts.append(f"Due: {due_dt.strftime('%B %d, %Y %H:%M UTC')}")
+
+        doc_updated_at = (
+            datetime.fromisoformat(
+                assignment.updated_at.replace("Z", "+00:00")
+            ).astimezone(timezone.utc)
+            if assignment.updated_at
+            else None
+        )
+
+        document = self._build_document(
+            doc_id=f"canvas-assignment-{assignment.course_id}-{assignment.id}",
+            link=assignment.html_url,
+            text="\n\n".join(text_parts),
+            semantic_identifier=assignment.name or f"Assignment {assignment.id}",
+            doc_updated_at=doc_updated_at,
+            course_id=assignment.course_id,
+            doc_type="assignment",
+        )
+        return document
+
+    def _convert_announcement_to_document(
+        self, announcement: CanvasAnnouncement
+    ) -> Document:
+        """Convert a Canvas announcement to a Document."""
+        text_parts = [announcement.title]
+        msg_text = (
+            parse_html_page_basic(announcement.message) if announcement.message else ""
+        )
+        if msg_text:
+            text_parts.append(msg_text)
+
+        doc_updated_at = (
+            datetime.fromisoformat(
+                announcement.posted_at.replace("Z", "+00:00")
+            ).astimezone(timezone.utc)
+            if announcement.posted_at
+            else None
+        )
+
+        document = self._build_document(
+            doc_id=f"canvas-announcement-{announcement.course_id}-{announcement.id}",
+            link=announcement.html_url,
+            text="\n\n".join(text_parts),
+            semantic_identifier=announcement.title or f"Announcement {announcement.id}",
+            doc_updated_at=doc_updated_at,
+            course_id=announcement.course_id,
+            doc_type="announcement",
+        )
+        return document
+
+    @override
+    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
+        """Load and validate Canvas credentials."""
+        access_token = credentials.get("canvas_access_token")
+        if not access_token:
+            raise ConnectorMissingCredentialError("Canvas")
+
+        try:
+            client = CanvasApiClient(
+                bearer_token=access_token,
+                canvas_base_url=self.canvas_base_url,
+            )
+            client.get("courses", params={"per_page": "1"})
+        except ValueError as e:
+            raise ConnectorValidationError(f"Invalid Canvas base URL: {e}")
+        except OnyxError as e:
+            _handle_canvas_api_error(e)
+
+        self._canvas_client = client
+        return None
+
+    @override
+    def validate_connector_settings(self) -> None:
+        """Validate Canvas connector settings by testing API access."""
+        try:
+            self.canvas_client.get("courses", params={"per_page": "1"})
+            logger.info("Canvas connector settings validated successfully")
+        except OnyxError as e:
+            _handle_canvas_api_error(e)
+        except ConnectorMissingCredentialError:
+            raise
+        except Exception as exc:
+            raise UnexpectedValidationError(
+                f"Unexpected error during Canvas settings validation: {exc}"
+            )
+
+    @override
+    def load_from_checkpoint(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: CanvasConnectorCheckpoint,
+    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
+        # TODO(benwu408): implemented in PR3 (checkpoint)
+        raise NotImplementedError
+
+    @override
+    def load_from_checkpoint_with_perm_sync(
+        self,
+        start: SecondsSinceUnixEpoch,
+        end: SecondsSinceUnixEpoch,
+        checkpoint: CanvasConnectorCheckpoint,
+    ) -> CheckpointOutput[CanvasConnectorCheckpoint]:
+        # TODO(benwu408): implemented in PR3 (checkpoint)
+        raise NotImplementedError
+
+    @override
+    def build_dummy_checkpoint(self) -> CanvasConnectorCheckpoint:
+        # TODO(benwu408): implemented in PR3 (checkpoint)
+        raise NotImplementedError
+
+    @override
+    def validate_checkpoint_json(
+        self, checkpoint_json: str
+    ) -> CanvasConnectorCheckpoint:
+        # TODO(benwu408): implemented in PR3 (checkpoint)
+        raise NotImplementedError
+
+    @override
+    def retrieve_all_slim_docs_perm_sync(
+        self,
+        start: SecondsSinceUnixEpoch | None = None,
+        end: SecondsSinceUnixEpoch | None = None,
+        callback: IndexingHeartbeatInterface | None = None,
+    ) -> GenerateSlimDocumentOutput:
+        # TODO(benwu408): implemented in PR4 (perm sync)
+        raise NotImplementedError
--- a/backend/onyx/connectors/discord/connector.py
+++ b/backend/onyx/connectors/discord/connector.py
@@ -11,11 +11,13 @@ from discord import Client
 from discord.channel import TextChannel
 from discord.channel import Thread
 from discord.enums import MessageType
+from discord.errors import LoginFailure
 from discord.flags import Intents
 from discord.message import Message as DiscordMessage

 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
+from onyx.connectors.exceptions import CredentialInvalidError
 from onyx.connectors.interfaces import GenerateDocumentsOutput
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
@@ -209,8 +211,19 @@ def _manage_async_retrieval(
        intents = Intents.default()
        intents.message_content = True
        async with Client(intents=intents) as discord_client:
-            asyncio.create_task(discord_client.start(token))
-            await discord_client.wait_until_ready()
+            start_task = asyncio.create_task(discord_client.start(token))
+            ready_task = asyncio.create_task(discord_client.wait_until_ready())
+
+            done, _ = await asyncio.wait(
+                {start_task, ready_task},
+                return_when=asyncio.FIRST_COMPLETED,
+            )
+
+            # start() runs indefinitely once connected, so it only lands
+            # in `done` when login/connection failed — propagate the error.
+            if start_task in done:
+                ready_task.cancel()
+                start_task.result()

            filtered_channels: list[TextChannel] = await _fetch_filtered_channels(
                discord_client=discord_client,
@@ -276,6 +289,19 @@ class DiscordConnector(PollConnector, LoadConnector):
        self._discord_bot_token = credentials["discord_bot_token"]
        return None

+    def validate_connector_settings(self) -> None:
+        loop = asyncio.new_event_loop()
+        try:
+            client = Client(intents=Intents.default())
+            try:
+                loop.run_until_complete(client.login(self.discord_bot_token))
+            except LoginFailure as e:
+                raise CredentialInvalidError(f"Invalid Discord bot token: {e}")
+            finally:
+                loop.run_until_complete(client.close())
+        finally:
+            loop.close()
+
    def _manage_doc_batching(
        self,
        start: datetime | None = None,
--- a/backend/onyx/connectors/registry.py
+++ b/backend/onyx/connectors/registry.py
@@ -72,6 +72,10 @@ CONNECTOR_CLASS_MAP = {
        module_path="onyx.connectors.coda.connector",
        class_name="CodaConnector",
    ),
+    DocumentSource.CANVAS: ConnectorMapping(
+        module_path="onyx.connectors.canvas.connector",
+        class_name="CanvasConnector",
+    ),
    DocumentSource.NOTION: ConnectorMapping(
        module_path="onyx.connectors.notion.connector",
        class_name="NotionConnector",
--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@@ -8,7 +8,6 @@ from uuid import UUID
 from fastapi import HTTPException
 from sqlalchemy import delete
 from sqlalchemy import desc
-from sqlalchemy import exists
 from sqlalchemy import func
 from sqlalchemy import nullsfirst
 from sqlalchemy import or_
@@ -132,32 +131,47 @@ def get_chat_sessions_by_user(
    if before is not None:
        stmt = stmt.where(ChatSession.time_updated < before)

-    if limit:
-        stmt = stmt.limit(limit)
-
    if project_id is not None:
        stmt = stmt.where(ChatSession.project_id == project_id)
    elif only_non_project_chats:
        stmt = stmt.where(ChatSession.project_id.is_(None))

-    if not include_failed_chats:
-        non_system_message_exists_subq = (
-            exists()
-            .where(ChatMessage.chat_session_id == ChatSession.id)
-            .where(ChatMessage.message_type != MessageType.SYSTEM)
-            .correlate(ChatSession)
-        )
-
-        # Leeway for newly created chats that don't have messages yet
-        time = datetime.now(timezone.utc) - timedelta(minutes=5)
-        recently_created = ChatSession.time_created >= time
-
-        stmt = stmt.where(or_(non_system_message_exists_subq, recently_created))
+    # When filtering out failed chats, we apply the limit in Python after
+    # filtering rather than in SQL, since the post-filter may remove rows.
+    if limit and include_failed_chats:
+        stmt = stmt.limit(limit)

    result = db_session.execute(stmt)
-    chat_sessions = result.scalars().all()
+    chat_sessions = list(result.scalars().all())

-    return list(chat_sessions)
+    if not include_failed_chats and chat_sessions:
+        # Filter out "failed" sessions (those with only SYSTEM messages)
+        # using a separate efficient query instead of a correlated EXISTS
+        # subquery, which causes full sequential scans of chat_message.
+        leeway = datetime.now(timezone.utc) - timedelta(minutes=5)
+        session_ids = [cs.id for cs in chat_sessions if cs.time_created < leeway]
+
+        if session_ids:
+            valid_session_ids_stmt = (
+                select(ChatMessage.chat_session_id)
+                .where(ChatMessage.chat_session_id.in_(session_ids))
+                .where(ChatMessage.message_type != MessageType.SYSTEM)
+                .distinct()
+            )
+            valid_session_ids = set(
+                db_session.execute(valid_session_ids_stmt).scalars().all()
+            )
+
+            chat_sessions = [
+                cs
+                for cs in chat_sessions
+                if cs.time_created >= leeway or cs.id in valid_session_ids
+            ]
+
+        if limit:
+            chat_sessions = chat_sessions[:limit]
+
+    return chat_sessions


 def delete_orphaned_search_docs(db_session: Session) -> None:
--- a/backend/onyx/db/enums.py
+++ b/backend/onyx/db/enums.py
@@ -215,6 +215,7 @@ class UserFileStatus(str, PyEnum):
    PROCESSING = "PROCESSING"
    INDEXING = "INDEXING"
    COMPLETED = "COMPLETED"
+    SKIPPED = "SKIPPED"
    FAILED = "FAILED"
    CANCELED = "CANCELED"
    DELETING = "DELETING"
--- a/backend/onyx/db/projects.py
+++ b/backend/onyx/db/projects.py
@@ -18,6 +18,7 @@ from onyx.configs.constants import FileOrigin
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
+from onyx.db.enums import UserFileStatus
 from onyx.db.models import Project__UserFile
 from onyx.db.models import User
 from onyx.db.models import UserFile
@@ -81,6 +82,7 @@ def create_user_files(
        )
        if new_temp_id is not None:
            id_to_temp_id[str(new_id)] = new_temp_id
+        should_skip = (file.filename or "") in categorized_files.skip_indexing
        new_file = UserFile(
            id=new_id,
            user_id=user.id,
@@ -92,6 +94,7 @@ def create_user_files(
            link_url=link_url,
            content_type=file.content_type,
            file_type=file.content_type,
+            status=UserFileStatus.SKIPPED if should_skip else UserFileStatus.PROCESSING,
            last_accessed_at=datetime.datetime.now(datetime.timezone.utc),
        )
        # Persist the UserFile first to satisfy FK constraints for association table
--- a/backend/onyx/document_index/disabled.py
+++ b/backend/onyx/document_index/disabled.py
@@ -5,6 +5,7 @@ accidentally reaches the vector DB layer will fail loudly instead of timing
 out against a nonexistent Vespa/OpenSearch instance.
 """

+from collections.abc import Iterable
 from typing import Any

 from onyx.context.search.models import IndexFilters
@@ -66,7 +67,7 @@ class DisabledDocumentIndex(DocumentIndex):
    # ------------------------------------------------------------------
    def index(
        self,
-        chunks: list[DocMetadataAwareIndexChunk],  # noqa: ARG002
+        chunks: Iterable[DocMetadataAwareIndexChunk],  # noqa: ARG002
        index_batch_params: IndexBatchParams,  # noqa: ARG002
    ) -> set[DocumentInsertionRecord]:
        raise RuntimeError(VECTOR_DB_DISABLED_ERROR)
--- a/backend/onyx/document_index/interfaces.py
+++ b/backend/onyx/document_index/interfaces.py
@@ -1,4 +1,5 @@
 import abc
+from collections.abc import Iterable
 from dataclasses import dataclass
 from datetime import datetime
 from typing import Any
@@ -206,7 +207,7 @@ class Indexable(abc.ABC):
    @abc.abstractmethod
    def index(
        self,
-        chunks: list[DocMetadataAwareIndexChunk],
+        chunks: Iterable[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[DocumentInsertionRecord]:
        """
@@ -226,8 +227,8 @@ class Indexable(abc.ABC):
        it is done automatically outside of this code.

        Parameters:
-        - chunks: Document chunks with all of the information needed for indexing to the document
-                index.
+        - chunks: Document chunks with all of the information needed for
+                indexing to the document index.
        - tenant_id: The tenant id of the user whose chunks are being indexed
        - large_chunks_enabled: Whether large chunks are enabled

--- a/backend/onyx/document_index/interfaces_new.py
+++ b/backend/onyx/document_index/interfaces_new.py
@@ -1,4 +1,5 @@
 import abc
+from collections.abc import Iterable
 from typing import Self

 from pydantic import BaseModel
@@ -209,10 +210,10 @@ class Indexable(abc.ABC):
    @abc.abstractmethod
    def index(
        self,
-        chunks: list[DocMetadataAwareIndexChunk],
+        chunks: Iterable[DocMetadataAwareIndexChunk],
        indexing_metadata: IndexingMetadata,
    ) -> list[DocumentInsertionRecord]:
-        """Indexes a list of document chunks into the document index.
+        """Indexes an iterable of document chunks into the document index.

        This is often a batch operation including chunks from multiple
        documents.
--- a/backend/onyx/document_index/opensearch/client.py
+++ b/backend/onyx/document_index/opensearch/client.py
@@ -932,7 +932,7 @@ class OpenSearchIndexClient(OpenSearchClient):
    def search_for_document_ids(
        self,
        body: dict[str, Any],
-        search_type: OpenSearchSearchType = OpenSearchSearchType.DOCUMENT_IDS,
+        search_type: OpenSearchSearchType = OpenSearchSearchType.UNKNOWN,
    ) -> list[str]:
        """Searches the index and returns only document chunk IDs.

--- a/backend/onyx/document_index/opensearch/constants.py
+++ b/backend/onyx/document_index/opensearch/constants.py
@@ -60,8 +60,7 @@ class OpenSearchSearchType(str, Enum):
    KEYWORD = "keyword"
    SEMANTIC = "semantic"
    RANDOM = "random"
-    ID_RETRIEVAL = "id_retrieval"
-    DOCUMENT_IDS = "document_ids"
+    DOC_ID_RETRIEVAL = "doc_id_retrieval"
    UNKNOWN = "unknown"


--- a/backend/onyx/document_index/opensearch/opensearch_document_index.py
+++ b/backend/onyx/document_index/opensearch/opensearch_document_index.py
@@ -1,11 +1,12 @@
 import json
-from collections import defaultdict
+from collections.abc import Iterable
 from typing import Any

 import httpx
 from opensearchpy import NotFoundError

 from onyx.access.models import DocumentAccess
+from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
 from onyx.configs.app_configs import VERIFY_CREATE_OPENSEARCH_INDEX_ON_INIT_MT
 from onyx.configs.chat_configs import NUM_RETURNED_HITS
 from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
@@ -351,7 +352,7 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):

    def index(
        self,
-        chunks: list[DocMetadataAwareIndexChunk],
+        chunks: Iterable[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[OldDocumentInsertionRecord]:
        """
@@ -647,10 +648,10 @@ class OpenSearchDocumentIndex(DocumentIndex):

    def index(
        self,
-        chunks: list[DocMetadataAwareIndexChunk],
-        indexing_metadata: IndexingMetadata,  # noqa: ARG002
+        chunks: Iterable[DocMetadataAwareIndexChunk],
+        indexing_metadata: IndexingMetadata,
    ) -> list[DocumentInsertionRecord]:
-        """Indexes a list of document chunks into the document index.
+        """Indexes an iterable of document chunks into the document index.

        Groups chunks by document ID and for each document, deletes existing
        chunks and indexes the new chunks in bulk.
@@ -673,29 +674,34 @@ class OpenSearchDocumentIndex(DocumentIndex):
                document is newly indexed or had already existed and was just
                updated.
        """
-        # Group chunks by document ID.
-        doc_id_to_chunks: dict[str, list[DocMetadataAwareIndexChunk]] = defaultdict(
-            list
+        total_chunks = sum(
+            cc.new_chunk_cnt
+            for cc in indexing_metadata.doc_id_to_chunk_cnt_diff.values()
        )
-        for chunk in chunks:
-            doc_id_to_chunks[chunk.source_document.id].append(chunk)
        logger.debug(
-            f"[OpenSearchDocumentIndex] Indexing {len(chunks)} chunks from {len(doc_id_to_chunks)} "
+            f"[OpenSearchDocumentIndex] Indexing {total_chunks} chunks from {len(indexing_metadata.doc_id_to_chunk_cnt_diff)} "
            f"documents for index {self._index_name}."
        )

        document_indexing_results: list[DocumentInsertionRecord] = []
-        # Try to index per-document.
-        for _, chunks in doc_id_to_chunks.items():
+        deleted_doc_ids: set[str] = set()
+        # Buffer chunks per document as they arrive from the iterable.
+        # When the document ID changes flush the buffered chunks.
+        current_doc_id: str | None = None
+        current_chunks: list[DocMetadataAwareIndexChunk] = []
+
+        def _flush_chunks(doc_chunks: list[DocMetadataAwareIndexChunk]) -> None:
+            assert len(doc_chunks) > 0, "doc_chunks is empty"
+
            # Create a batch of OpenSearch-formatted chunks for bulk insertion.
-            # Do this before deleting existing chunks to reduce the amount of
-            # time the document index has no content for a given document, and
-            # to reduce the chance of entering a state where we delete chunks,
-            # then some error happens, and never successfully index new chunks.
+            # Since we are doing this in batches, an error occurring midway
+            # can result in a state where chunks are deleted and not all the
+            # new chunks have been indexed.
            chunk_batch: list[DocumentChunk] = [
-                _convert_onyx_chunk_to_opensearch_document(chunk) for chunk in chunks
+                _convert_onyx_chunk_to_opensearch_document(chunk)
+                for chunk in doc_chunks
            ]
-            onyx_document: Document = chunks[0].source_document
+            onyx_document: Document = doc_chunks[0].source_document
            # First delete the doc's chunks from the index. This is so that
            # there are no dangling chunks in the index, in the event that the
            # new document's content contains fewer chunks than the previous
@@ -704,22 +710,43 @@ class OpenSearchDocumentIndex(DocumentIndex):
            # if the chunk count has actually decreased. This assumes that
            # overlapping chunks are perfectly overwritten. If we can't
            # guarantee that then we need the code as-is.
-            num_chunks_deleted = self.delete(
-                onyx_document.id, onyx_document.chunk_count
-            )
-            # If we see that chunks were deleted we assume the doc already
-            # existed.
-            document_insertion_record = DocumentInsertionRecord(
-                document_id=onyx_document.id,
-                already_existed=num_chunks_deleted > 0,
-            )
+            if onyx_document.id not in deleted_doc_ids:
+                num_chunks_deleted = self.delete(
+                    onyx_document.id, onyx_document.chunk_count
+                )
+                deleted_doc_ids.add(onyx_document.id)
+                # If we see that chunks were deleted we assume the doc already
+                # existed. We record the result before bulk_index_documents
+                # runs. If indexing raises, this entire result list is discarded
+                # by the caller's retry logic, so early recording is safe.
+                document_indexing_results.append(
+                    DocumentInsertionRecord(
+                        document_id=onyx_document.id,
+                        already_existed=num_chunks_deleted > 0,
+                    )
+                )
            # Now index. This will raise if a chunk of the same ID exists, which
            # we do not expect because we should have deleted all chunks.
            self._client.bulk_index_documents(
                documents=chunk_batch,
                tenant_state=self._tenant_state,
            )
-            document_indexing_results.append(document_insertion_record)
+
+        for chunk in chunks:
+            doc_id = chunk.source_document.id
+            if doc_id != current_doc_id:
+                if current_chunks:
+                    _flush_chunks(current_chunks)
+                current_doc_id = doc_id
+                current_chunks = [chunk]
+            elif len(current_chunks) >= MAX_CHUNKS_PER_DOC_BATCH:
+                _flush_chunks(current_chunks)
+                current_chunks = [chunk]
+            else:
+                current_chunks.append(chunk)
+
+        if current_chunks:
+            _flush_chunks(current_chunks)

        return document_indexing_results

@@ -901,7 +928,7 @@ class OpenSearchDocumentIndex(DocumentIndex):
            search_hits = self._client.search(
                body=query_body,
                search_pipeline_id=None,
-                search_type=OpenSearchSearchType.ID_RETRIEVAL,
+                search_type=OpenSearchSearchType.DOC_ID_RETRIEVAL,
            )
            inference_chunks_uncleaned: list[InferenceChunkUncleaned] = [
                _convert_retrieved_opensearch_chunk_to_inference_chunk_uncleaned(
--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@@ -6,6 +6,7 @@ import re
 import time
 import urllib
 import zipfile
+from collections.abc import Iterable
 from dataclasses import dataclass
 from datetime import datetime
 from datetime import timedelta
@@ -461,7 +462,7 @@ class VespaIndex(DocumentIndex):

    def index(
        self,
-        chunks: list[DocMetadataAwareIndexChunk],
+        chunks: Iterable[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[OldDocumentInsertionRecord]:
        """
--- a/backend/onyx/document_index/vespa/vespa_document_index.py
+++ b/backend/onyx/document_index/vespa/vespa_document_index.py
@@ -1,6 +1,8 @@
 import concurrent.futures
 import logging
 import random
+from collections.abc import Generator
+from collections.abc import Iterable
 from typing import Any
 from uuid import UUID

@@ -8,6 +10,7 @@ import httpx
 from pydantic import BaseModel
 from retry import retry

+from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
 from onyx.configs.app_configs import RECENCY_BIAS_MULTIPLIER
 from onyx.configs.app_configs import RERANK_COUNT
 from onyx.configs.chat_configs import DOC_TIME_DECAY
@@ -318,7 +321,7 @@ class VespaDocumentIndex(DocumentIndex):

    def index(
        self,
-        chunks: list[DocMetadataAwareIndexChunk],
+        chunks: Iterable[DocMetadataAwareIndexChunk],
        indexing_metadata: IndexingMetadata,
    ) -> list[DocumentInsertionRecord]:
        doc_id_to_chunk_cnt_diff = indexing_metadata.doc_id_to_chunk_cnt_diff
@@ -338,22 +341,31 @@ class VespaDocumentIndex(DocumentIndex):

        # Vespa has restrictions on valid characters, yet document IDs come from
        # external w.r.t. this class. We need to sanitize them.
-        cleaned_chunks: list[DocMetadataAwareIndexChunk] = [
-            clean_chunk_id_copy(chunk) for chunk in chunks
-        ]
-        assert len(cleaned_chunks) == len(
-            chunks
-        ), "Bug: Cleaned chunks and input chunks have different lengths."
+        #
+        # Instead of materializing all cleaned chunks upfront, we stream them
+        # through a generator that cleans IDs and builds the original-ID mapping
+        # incrementally as chunks flow into Vespa.
+        def _clean_and_track(
+            chunks_iter: Iterable[DocMetadataAwareIndexChunk],
+            id_map: dict[str, str],
+            seen_ids: set[str],
+        ) -> Generator[DocMetadataAwareIndexChunk, None, None]:
+            """Cleans chunk IDs and builds the original-ID mapping
+            incrementally as chunks flow through, avoiding a separate
+            materialization pass."""
+            for chunk in chunks_iter:
+                original_id = chunk.source_document.id
+                cleaned = clean_chunk_id_copy(chunk)
+                cleaned_id = cleaned.source_document.id
+                # Needed so the final DocumentInsertionRecord returned can have
+                # the original document ID. cleaned_chunks might not contain IDs
+                # exactly as callers supplied them.
+                id_map[cleaned_id] = original_id
+                seen_ids.add(cleaned_id)
+                yield cleaned

-        # Needed so the final DocumentInsertionRecord returned can have the
-        # original document ID. cleaned_chunks might not contain IDs exactly as
-        # callers supplied them.
-        new_document_id_to_original_document_id: dict[str, str] = dict()
-        for i, cleaned_chunk in enumerate(cleaned_chunks):
-            old_chunk = chunks[i]
-            new_document_id_to_original_document_id[
-                cleaned_chunk.source_document.id
-            ] = old_chunk.source_document.id
+        new_document_id_to_original_document_id: dict[str, str] = {}
+        all_cleaned_doc_ids: set[str] = set()

        existing_docs: set[str] = set()

@@ -409,8 +421,16 @@ class VespaDocumentIndex(DocumentIndex):
                    executor=executor,
                )

-            # Insert new Vespa documents.
-            for chunk_batch in batch_generator(cleaned_chunks, BATCH_SIZE):
+            # Insert new Vespa documents, streaming through the cleaning
+            # pipeline so chunks are never fully materialized.
+            cleaned_chunks = _clean_and_track(
+                chunks,
+                new_document_id_to_original_document_id,
+                all_cleaned_doc_ids,
+            )
+            for chunk_batch in batch_generator(
+                cleaned_chunks, min(BATCH_SIZE, MAX_CHUNKS_PER_DOC_BATCH)
+            ):
                batch_index_vespa_chunks(
                    chunks=chunk_batch,
                    index_name=self._index_name,
@@ -419,10 +439,6 @@ class VespaDocumentIndex(DocumentIndex):
                    executor=executor,
                )

-        all_cleaned_doc_ids: set[str] = {
-            chunk.source_document.id for chunk in cleaned_chunks
-        }
-
        return [
            DocumentInsertionRecord(
                document_id=new_document_id_to_original_document_id[cleaned_doc_id],
--- a/backend/onyx/file_processing/extract_file_text.py
+++ b/backend/onyx/file_processing/extract_file_text.py
@@ -44,6 +44,7 @@ KNOWN_OPENPYXL_BUGS = [
    "Value must be either numerical or a string containing a wildcard",
    "File contains no valid workbook part",
    "Unable to read workbook: could not read stylesheet from None",
+    "Colors must be aRGB hex values",
 ]


--- a/backend/onyx/file_processing/file_types.py
+++ b/backend/onyx/file_processing/file_types.py
@@ -53,6 +53,11 @@ class OnyxMimeTypes:


 class OnyxFileExtensions:
+    TABULAR_EXTENSIONS = {
+        ".csv",
+        ".tsv",
+        ".xlsx",
+    }
    PLAIN_TEXT_EXTENSIONS = {
        ".txt",
        ".md",
--- a/backend/onyx/file_store/models.py
+++ b/backend/onyx/file_store/models.py
@@ -13,7 +13,7 @@ class ChatFileType(str, Enum):
    DOC = "document"
    # Plain text only contain the text
    PLAIN_TEXT = "plain_text"
-    # Tabular data files (CSV, TSV, XLSX) — metadata-only injection
+    # Tabular data files (CSV, XLSX)
    TABULAR = "tabular"

    def is_text_file(self) -> bool:
@@ -23,11 +23,6 @@ class ChatFileType(str, Enum):
            ChatFileType.TABULAR,
        )

-    def use_metadata_only(self) -> bool:
-        """File types where we can ignore the file content
-        and only use the metadata."""
-        return self in (ChatFileType.TABULAR,)
-

 class FileDescriptor(TypedDict):
    """NOTE: is a `TypedDict` so it can be used as a type hint for a JSONB column
--- a/backend/onyx/file_store/utils.py
+++ b/backend/onyx/file_store/utils.py
@@ -110,20 +110,16 @@ def load_user_file(file_id: UUID, db_session: Session) -> InMemoryChatFile:
    # check for plain text normalized version first, then use original file otherwise
    try:
        file_io = file_store.read_file(plaintext_file_name, mode="b")
-        # Metadata-only file types preserve their original type so
-        # downstream injection paths can route them correctly.
-        if chat_file_type.use_metadata_only():
-            plaintext_chat_file_type = chat_file_type
-        elif file_io is not None:
-            # if we have plaintext for image (which happens when image
-            # extraction is enabled), we use PLAIN_TEXT type
+        # For plaintext versions, use PLAIN_TEXT type (unless it's an image which doesn't have plaintext)
+        plaintext_chat_file_type = (
+            ChatFileType.PLAIN_TEXT
+            if chat_file_type != ChatFileType.IMAGE
+            else chat_file_type
+        )
+
+        # if we have plaintext for image (which happens when image extraction is enabled), we use PLAIN_TEXT type
+        if file_io is not None:
            plaintext_chat_file_type = ChatFileType.PLAIN_TEXT
-        else:
-            plaintext_chat_file_type = (
-                ChatFileType.PLAIN_TEXT
-                if chat_file_type != ChatFileType.IMAGE
-                else chat_file_type
-            )

        chat_file = InMemoryChatFile(
            file_id=str(user_file.file_id),
--- a/backend/onyx/hooks/api_dependencies.py
+++ b/backend/onyx/hooks/api_dependencies.py
@@ -1,4 +1,3 @@
-from onyx.configs.app_configs import HOOK_ENABLED
 from onyx.error_handling.error_codes import OnyxErrorCode
 from onyx.error_handling.exceptions import OnyxError
 from shared_configs.configs import MULTI_TENANT
@@ -7,10 +6,7 @@ from shared_configs.configs import MULTI_TENANT
 def require_hook_enabled() -> None:
    """FastAPI dependency that gates all hook management endpoints.

-    Hooks are only available in single-tenant / self-hosted deployments with
-    HOOK_ENABLED=true explicitly set. Two layers of protection:
-      1. MULTI_TENANT check — rejects even if HOOK_ENABLED is accidentally set true
-      2. HOOK_ENABLED flag — explicit opt-in by the operator
+    Hooks are only available in single-tenant / self-hosted EE deployments.

    Use as: Depends(require_hook_enabled)
    """
@@ -19,8 +15,3 @@ def require_hook_enabled() -> None:
            OnyxErrorCode.SINGLE_TENANT_ONLY,
            "Hooks are not available in multi-tenant deployments",
        )
-    if not HOOK_ENABLED:
-        raise OnyxError(
-            OnyxErrorCode.ENV_VAR_GATED,
-            "Hooks are not enabled. Set HOOK_ENABLED=true to enable.",
-        )
--- a/backend/onyx/hooks/executor.py
+++ b/backend/onyx/hooks/executor.py
@@ -1,79 +1,22 @@
-"""Hook executor — calls a customer's external HTTP endpoint for a given hook point.
+"""CE hook executor.

-Usage (Celery tasks and FastAPI handlers):
-    result = execute_hook(
-        db_session=db_session,
-        hook_point=HookPoint.QUERY_PROCESSING,
-        payload={"query": "...", "user_email": "...", "chat_session_id": "..."},
-        response_type=QueryProcessingResponse,
-    )
+HookSkipped and HookSoftFailed are real classes kept here because
+process_message.py (CE code) uses isinstance checks against them.

-    if isinstance(result, HookSkipped):
-        # no active hook configured — continue with original behavior
-        ...
-    elif isinstance(result, HookSoftFailed):
-        # hook failed but fail strategy is SOFT — continue with original behavior
-        ...
-    else:
-        # result is a validated Pydantic model instance (response_type)
-        ...
-
-is_reachable update policy
--------------------------
-``is_reachable`` on the Hook row is updated selectively — only when the outcome
-carries meaningful signal about physical reachability:
-
-  NetworkError (DNS, connection refused)  → False  (cannot reach the server)
-  HTTP 401 / 403                          → False  (api_key revoked or invalid)
-  TimeoutException                        → None   (server may be slow, skip write)
-  Other HTTP errors (4xx / 5xx)           → None   (server responded, skip write)
-  Unknown exception                       → None   (no signal, skip write)
-  Non-JSON / non-dict response            → None   (server responded, skip write)
-  Success (2xx, valid dict)               → True   (confirmed reachable)
-
-None means "leave the current value unchanged" — no DB round-trip is made.
-
-DB session design
-----------------
-The executor uses three sessions:
-
-  1. Caller's session (db_session) — used only for the hook lookup read. All
-     needed fields are extracted from the Hook object before the HTTP call, so
-     the caller's session is not held open during the external HTTP request.
-
-  2. Log session — a separate short-lived session opened after the HTTP call
-     completes to write the HookExecutionLog row on failure. Success runs are
-     not recorded. Committed independently of everything else.
-
-  3. Reachable session — a second short-lived session to update is_reachable on
-     the Hook. Kept separate from the log session so a concurrent hook deletion
-     (which causes update_hook__no_commit to raise OnyxError(NOT_FOUND)) cannot
-     prevent the execution log from being written. This update is best-effort.
+execute_hook is the public entry point. It dispatches to _execute_hook_impl
+via fetch_versioned_implementation so that:
+  - CE: onyx.hooks.executor._execute_hook_impl → no-op, returns HookSkipped()
+  - EE: ee.onyx.hooks.executor._execute_hook_impl → real HTTP call
 """

-import json
-import time
 from typing import Any
 from typing import TypeVar

-import httpx
 from pydantic import BaseModel
-from pydantic import ValidationError
 from sqlalchemy.orm import Session

-from onyx.db.engine.sql_engine import get_session_with_current_tenant
-from onyx.db.enums import HookFailStrategy
 from onyx.db.enums import HookPoint
-from onyx.db.hook import create_hook_execution_log__no_commit
-from onyx.db.hook import get_non_deleted_hook_by_hook_point
-from onyx.db.hook import update_hook__no_commit
-from onyx.db.models import Hook
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
-from onyx.hooks.utils import HOOKS_AVAILABLE
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
+from onyx.utils.variable_functionality import fetch_versioned_implementation


 class HookSkipped:
@@ -87,277 +30,15 @@ class HookSoftFailed:
 T = TypeVar("T", bound=BaseModel)


-# ---------------------------------------------------------------------------
-# Private helpers
-# ---------------------------------------------------------------------------
-
-
-class _HttpOutcome(BaseModel):
-    """Structured result of an HTTP hook call, returned by _process_response."""
-
-    is_success: bool
-    updated_is_reachable: (
-        bool | None
-    )  # True/False = write to DB, None = unchanged (skip write)
-    status_code: int | None
-    error_message: str | None
-    response_payload: dict[str, Any] | None
-
-
-def _lookup_hook(
-    db_session: Session,
-    hook_point: HookPoint,
-) -> Hook | HookSkipped:
-    """Return the active Hook or HookSkipped if hooks are unavailable/unconfigured.
-
-    No HTTP call is made and no DB writes are performed for any HookSkipped path.
-    There is nothing to log and no reachability information to update.
-    """
-    if not HOOKS_AVAILABLE:
-        return HookSkipped()
-    hook = get_non_deleted_hook_by_hook_point(
-        db_session=db_session, hook_point=hook_point
-    )
-    if hook is None or not hook.is_active:
-        return HookSkipped()
-    if not hook.endpoint_url:
-        return HookSkipped()
-    return hook
-
-
-def _process_response(
+def _execute_hook_impl(
    *,
-    response: httpx.Response | None,
-    exc: Exception | None,
-    timeout: float,
-) -> _HttpOutcome:
-    """Process the result of an HTTP call and return a structured outcome.
-
-    Called after the client.post() try/except. If post() raised, exc is set and
-    response is None. Otherwise response is set and exc is None. Handles
-    raise_for_status(), JSON decoding, and the dict shape check.
-    """
-    if exc is not None:
-        if isinstance(exc, httpx.NetworkError):
-            msg = f"Hook network error (endpoint unreachable): {exc}"
-            logger.warning(msg, exc_info=exc)
-            return _HttpOutcome(
-                is_success=False,
-                updated_is_reachable=False,
-                status_code=None,
-                error_message=msg,
-                response_payload=None,
-            )
-        if isinstance(exc, httpx.TimeoutException):
-            msg = f"Hook timed out after {timeout}s: {exc}"
-            logger.warning(msg, exc_info=exc)
-            return _HttpOutcome(
-                is_success=False,
-                updated_is_reachable=None,  # timeout doesn't indicate unreachability
-                status_code=None,
-                error_message=msg,
-                response_payload=None,
-            )
-        msg = f"Hook call failed: {exc}"
-        logger.exception(msg, exc_info=exc)
-        return _HttpOutcome(
-            is_success=False,
-            updated_is_reachable=None,  # unknown error — don't make assumptions
-            status_code=None,
-            error_message=msg,
-            response_payload=None,
-        )
-
-    if response is None:
-        raise ValueError(
-            "exactly one of response or exc must be non-None; both are None"
-        )
-    status_code = response.status_code
-
-    try:
-        response.raise_for_status()
-    except httpx.HTTPStatusError as e:
-        msg = f"Hook returned HTTP {e.response.status_code}: {e.response.text}"
-        logger.warning(msg, exc_info=e)
-        # 401/403 means the api_key has been revoked or is invalid — mark unreachable
-        # so the operator knows to update it. All other HTTP errors keep is_reachable
-        # as-is (server is up, the request just failed for application reasons).
-        auth_failed = e.response.status_code in (401, 403)
-        return _HttpOutcome(
-            is_success=False,
-            updated_is_reachable=False if auth_failed else None,
-            status_code=status_code,
-            error_message=msg,
-            response_payload=None,
-        )
-
-    try:
-        response_payload = response.json()
-    except (json.JSONDecodeError, httpx.DecodingError) as e:
-        msg = f"Hook returned non-JSON response: {e}"
-        logger.warning(msg, exc_info=e)
-        return _HttpOutcome(
-            is_success=False,
-            updated_is_reachable=None,  # server responded — reachability unchanged
-            status_code=status_code,
-            error_message=msg,
-            response_payload=None,
-        )
-
-    if not isinstance(response_payload, dict):
-        msg = f"Hook returned non-dict JSON (got {type(response_payload).__name__})"
-        logger.warning(msg)
-        return _HttpOutcome(
-            is_success=False,
-            updated_is_reachable=None,  # server responded — reachability unchanged
-            status_code=status_code,
-            error_message=msg,
-            response_payload=None,
-        )
-
-    return _HttpOutcome(
-        is_success=True,
-        updated_is_reachable=True,
-        status_code=status_code,
-        error_message=None,
-        response_payload=response_payload,
-    )
-
-
-def _persist_result(
-    *,
-    hook_id: int,
-    outcome: _HttpOutcome,
-    duration_ms: int,
-) -> None:
-    """Write the execution log on failure and optionally update is_reachable, each
-    in its own session so a failure in one does not affect the other."""
-    # Only write the execution log on failure — success runs are not recorded.
-    # Must not be skipped if the is_reachable update fails (e.g. hook concurrently
-    # deleted between the initial lookup and here).
-    if not outcome.is_success:
-        try:
-            with get_session_with_current_tenant() as log_session:
-                create_hook_execution_log__no_commit(
-                    db_session=log_session,
-                    hook_id=hook_id,
-                    is_success=False,
-                    error_message=outcome.error_message,
-                    status_code=outcome.status_code,
-                    duration_ms=duration_ms,
-                )
-                log_session.commit()
-        except Exception:
-            logger.exception(
-                f"Failed to persist hook execution log for hook_id={hook_id}"
-            )
-
-    # Update is_reachable separately — best-effort, non-critical.
-    # None means the value is unchanged (set by the caller to skip the no-op write).
-    # update_hook__no_commit can raise OnyxError(NOT_FOUND) if the hook was
-    # concurrently deleted, so keep this isolated from the log write above.
-    if outcome.updated_is_reachable is not None:
-        try:
-            with get_session_with_current_tenant() as reachable_session:
-                update_hook__no_commit(
-                    db_session=reachable_session,
-                    hook_id=hook_id,
-                    is_reachable=outcome.updated_is_reachable,
-                )
-                reachable_session.commit()
-        except Exception:
-            logger.warning(f"Failed to update is_reachable for hook_id={hook_id}")
-
-
-# ---------------------------------------------------------------------------
-# Public API
-# ---------------------------------------------------------------------------
-
-
-def _execute_hook_inner(
-    hook: Hook,
-    payload: dict[str, Any],
-    response_type: type[T],
-) -> T | HookSoftFailed:
-    """Make the HTTP call, validate the response, and return a typed model.
-
-    Raises OnyxError on HARD failure. Returns HookSoftFailed on SOFT failure.
-    """
-    timeout = hook.timeout_seconds
-    hook_id = hook.id
-    fail_strategy = hook.fail_strategy
-    endpoint_url = hook.endpoint_url
-    current_is_reachable: bool | None = hook.is_reachable
-
-    if not endpoint_url:
-        raise ValueError(
-            f"hook_id={hook_id} is active but has no endpoint_url — "
-            "active hooks without an endpoint_url must be rejected by _lookup_hook"
-        )
-
-    start = time.monotonic()
-    response: httpx.Response | None = None
-    exc: Exception | None = None
-    try:
-        api_key: str | None = (
-            hook.api_key.get_value(apply_mask=False) if hook.api_key else None
-        )
-        headers: dict[str, str] = {"Content-Type": "application/json"}
-        if api_key:
-            headers["Authorization"] = f"Bearer {api_key}"
-        with httpx.Client(
-            timeout=timeout, follow_redirects=False
-        ) as client:  # SSRF guard: never follow redirects
-            response = client.post(endpoint_url, json=payload, headers=headers)
-    except Exception as e:
-        exc = e
-    duration_ms = int((time.monotonic() - start) * 1000)
-
-    outcome = _process_response(response=response, exc=exc, timeout=timeout)
-
-    # Validate the response payload against response_type.
-    # A validation failure downgrades the outcome to a failure so it is logged,
-    # is_reachable is left unchanged (server responded — just a bad payload),
-    # and fail_strategy is respected below.
-    validated_model: T | None = None
-    if outcome.is_success and outcome.response_payload is not None:
-        try:
-            validated_model = response_type.model_validate(outcome.response_payload)
-        except ValidationError as e:
-            msg = (
-                f"Hook response failed validation against {response_type.__name__}: {e}"
-            )
-            outcome = _HttpOutcome(
-                is_success=False,
-                updated_is_reachable=None,  # server responded — reachability unchanged
-                status_code=outcome.status_code,
-                error_message=msg,
-                response_payload=None,
-            )
-
-    # Skip the is_reachable write when the value would not change — avoids a
-    # no-op DB round-trip on every call when the hook is already in the expected state.
-    if outcome.updated_is_reachable == current_is_reachable:
-        outcome = outcome.model_copy(update={"updated_is_reachable": None})
-    _persist_result(hook_id=hook_id, outcome=outcome, duration_ms=duration_ms)
-
-    if not outcome.is_success:
-        if fail_strategy == HookFailStrategy.HARD:
-            raise OnyxError(
-                OnyxErrorCode.HOOK_EXECUTION_FAILED,
-                outcome.error_message or "Hook execution failed.",
-            )
-        logger.warning(
-            f"Hook execution failed (soft fail) for hook_id={hook_id}: {outcome.error_message}"
-        )
-        return HookSoftFailed()
-
-    if validated_model is None:
-        raise OnyxError(
-            OnyxErrorCode.INTERNAL_ERROR,
-            f"validated_model is None for successful hook call (hook_id={hook_id})",
-        )
-    return validated_model
+    db_session: Session,  # noqa: ARG001
+    hook_point: HookPoint,  # noqa: ARG001
+    payload: dict[str, Any],  # noqa: ARG001
+    response_type: type[T],  # noqa: ARG001
+) -> T | HookSkipped | HookSoftFailed:
+    """CE no-op — hooks are not available without EE."""
+    return HookSkipped()


 def execute_hook(
@@ -367,25 +48,15 @@ def execute_hook(
    payload: dict[str, Any],
    response_type: type[T],
 ) -> T | HookSkipped | HookSoftFailed:
-    """Execute the hook for the given hook point synchronously.
+    """Execute the hook for the given hook point.

-    Returns HookSkipped if no active hook is configured, HookSoftFailed if the
-    hook failed with SOFT fail strategy, or a validated response model on success.
-    Raises OnyxError on HARD failure or if the hook is misconfigured.
+    Dispatches to the versioned implementation so EE gets the real executor
+    and CE gets the no-op stub, without any changes at the call site.
    """
-    hook = _lookup_hook(db_session, hook_point)
-    if isinstance(hook, HookSkipped):
-        return hook
-
-    fail_strategy = hook.fail_strategy
-    hook_id = hook.id
-
-    try:
-        return _execute_hook_inner(hook, payload, response_type)
-    except Exception:
-        if fail_strategy == HookFailStrategy.SOFT:
-            logger.exception(
-                f"Unexpected error in hook execution (soft fail) for hook_id={hook_id}"
-            )
-            return HookSoftFailed()
-        raise
+    impl = fetch_versioned_implementation("onyx.hooks.executor", "_execute_hook_impl")
+    return impl(
+        db_session=db_session,
+        hook_point=hook_point,
+        payload=payload,
+        response_type=response_type,
+    )
--- a/backend/onyx/hooks/utils.py
+++ b/backend/onyx/hooks/utils.py
@@ -1,5 +0,0 @@
-from onyx.configs.app_configs import HOOK_ENABLED
-from shared_configs.configs import MULTI_TENANT
-
-# True only when hooks are available: single-tenant deployment with HOOK_ENABLED=true.
-HOOKS_AVAILABLE: bool = HOOK_ENABLED and not MULTI_TENANT
--- a/backend/onyx/indexing/adapters/document_indexing_adapter.py
+++ b/backend/onyx/indexing/adapters/document_indexing_adapter.py
@@ -19,7 +19,8 @@ from onyx.db.document import update_docs_updated_at__no_commit
 from onyx.db.document_set import fetch_document_sets_for_documents
 from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
 from onyx.indexing.indexing_pipeline import index_doc_batch_prepare
-from onyx.indexing.models import BuildMetadataAwareChunksResult
+from onyx.indexing.models import ChunkEnrichmentContext
+from onyx.indexing.models import DocAwareChunk
 from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import IndexChunk
 from onyx.indexing.models import UpdatableChunkData
@@ -85,14 +86,21 @@ class DocumentIndexingBatchAdapter:
        ) as transaction:
            yield transaction

-    def build_metadata_aware_chunks(
+    def prepare_enrichment(
        self,
-        chunks_with_embeddings: list[IndexChunk],
-        chunk_content_scores: list[float],
-        tenant_id: str,
        context: DocumentBatchPrepareContext,
-    ) -> BuildMetadataAwareChunksResult:
-        """Enrich chunks with access, document sets, boosts, token counts, and hierarchy."""
+        tenant_id: str,
+        chunks: list[DocAwareChunk],
+    ) -> "DocumentChunkEnricher":
+        """Do all DB lookups once and return a per-chunk enricher."""
+        updatable_ids = [doc.id for doc in context.updatable_docs]
+
+        doc_id_to_new_chunk_cnt: dict[str, int] = {
+            doc_id: 0 for doc_id in updatable_ids
+        }
+        for chunk in chunks:
+            if chunk.source_document.id in doc_id_to_new_chunk_cnt:
+                doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1

        no_access = DocumentAccess.build(
            user_emails=[],
@@ -102,67 +110,30 @@ class DocumentIndexingBatchAdapter:
            is_public=False,
        )

-        updatable_ids = [doc.id for doc in context.updatable_docs]
-
-        doc_id_to_access_info = get_access_for_documents(
-            document_ids=updatable_ids, db_session=self.db_session
-        )
-        doc_id_to_document_set = {
-            document_id: document_sets
-            for document_id, document_sets in fetch_document_sets_for_documents(
+        return DocumentChunkEnricher(
+            doc_id_to_access_info=get_access_for_documents(
                document_ids=updatable_ids, db_session=self.db_session
-            )
-        }
-
-        doc_id_to_previous_chunk_cnt: dict[str, int] = {
-            document_id: chunk_count
-            for document_id, chunk_count in fetch_chunk_counts_for_documents(
-                document_ids=updatable_ids,
-                db_session=self.db_session,
-            )
-        }
-
-        doc_id_to_new_chunk_cnt: dict[str, int] = {
-            doc_id: 0 for doc_id in updatable_ids
-        }
-        for chunk in chunks_with_embeddings:
-            if chunk.source_document.id in doc_id_to_new_chunk_cnt:
-                doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
-
-        # Get ancestor hierarchy node IDs for each document
-        doc_id_to_ancestor_ids = self._get_ancestor_ids_for_documents(
-            context.updatable_docs, tenant_id
-        )
-
-        access_aware_chunks = [
-            DocMetadataAwareIndexChunk.from_index_chunk(
-                index_chunk=chunk,
-                access=doc_id_to_access_info.get(chunk.source_document.id, no_access),
-                document_sets=set(
-                    doc_id_to_document_set.get(chunk.source_document.id, [])
-                ),
-                user_project=[],
-                personas=[],
-                boost=(
-                    context.id_to_boost_map[chunk.source_document.id]
-                    if chunk.source_document.id in context.id_to_boost_map
-                    else DEFAULT_BOOST
-                ),
-                tenant_id=tenant_id,
-                aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
-                ancestor_hierarchy_node_ids=doc_id_to_ancestor_ids[
-                    chunk.source_document.id
-                ],
-            )
-            for chunk_num, chunk in enumerate(chunks_with_embeddings)
-        ]
-
-        return BuildMetadataAwareChunksResult(
-            chunks=access_aware_chunks,
-            doc_id_to_previous_chunk_cnt=doc_id_to_previous_chunk_cnt,
-            doc_id_to_new_chunk_cnt=doc_id_to_new_chunk_cnt,
-            user_file_id_to_raw_text={},
-            user_file_id_to_token_count={},
+            ),
+            doc_id_to_document_set={
+                document_id: document_sets
+                for document_id, document_sets in fetch_document_sets_for_documents(
+                    document_ids=updatable_ids, db_session=self.db_session
+                )
+            },
+            doc_id_to_ancestor_ids=self._get_ancestor_ids_for_documents(
+                context.updatable_docs, tenant_id
+            ),
+            id_to_boost_map=context.id_to_boost_map,
+            doc_id_to_previous_chunk_cnt={
+                document_id: chunk_count
+                for document_id, chunk_count in fetch_chunk_counts_for_documents(
+                    document_ids=updatable_ids,
+                    db_session=self.db_session,
+                )
+            },
+            doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),
+            no_access=no_access,
+            tenant_id=tenant_id,
        )

    def _get_ancestor_ids_for_documents(
@@ -203,7 +174,7 @@ class DocumentIndexingBatchAdapter:
        context: DocumentBatchPrepareContext,
        updatable_chunk_data: list[UpdatableChunkData],
        filtered_documents: list[Document],
-        result: BuildMetadataAwareChunksResult,
+        enrichment: ChunkEnrichmentContext,
    ) -> None:
        """Finalize DB updates, store plaintext, and mark docs as indexed."""
        updatable_ids = [doc.id for doc in context.updatable_docs]
@@ -227,7 +198,7 @@ class DocumentIndexingBatchAdapter:

        update_docs_chunk_count__no_commit(
            document_ids=updatable_ids,
-            doc_id_to_chunk_count=result.doc_id_to_new_chunk_cnt,
+            doc_id_to_chunk_count=enrichment.doc_id_to_new_chunk_cnt,
            db_session=self.db_session,
        )

@@ -249,3 +220,52 @@ class DocumentIndexingBatchAdapter:
        )

        self.db_session.commit()
+
+
+class DocumentChunkEnricher:
+    """Pre-computed metadata for per-chunk enrichment of connector documents."""
+
+    def __init__(
+        self,
+        doc_id_to_access_info: dict[str, DocumentAccess],
+        doc_id_to_document_set: dict[str, list[str]],
+        doc_id_to_ancestor_ids: dict[str, list[int]],
+        id_to_boost_map: dict[str, int],
+        doc_id_to_previous_chunk_cnt: dict[str, int],
+        doc_id_to_new_chunk_cnt: dict[str, int],
+        no_access: DocumentAccess,
+        tenant_id: str,
+    ) -> None:
+        self._doc_id_to_access_info = doc_id_to_access_info
+        self._doc_id_to_document_set = doc_id_to_document_set
+        self._doc_id_to_ancestor_ids = doc_id_to_ancestor_ids
+        self._id_to_boost_map = id_to_boost_map
+        self._no_access = no_access
+        self._tenant_id = tenant_id
+        self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt
+        self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt
+
+    def enrich_chunk(
+        self, chunk: IndexChunk, score: float
+    ) -> DocMetadataAwareIndexChunk:
+        return DocMetadataAwareIndexChunk.from_index_chunk(
+            index_chunk=chunk,
+            access=self._doc_id_to_access_info.get(
+                chunk.source_document.id, self._no_access
+            ),
+            document_sets=set(
+                self._doc_id_to_document_set.get(chunk.source_document.id, [])
+            ),
+            user_project=[],
+            personas=[],
+            boost=(
+                self._id_to_boost_map[chunk.source_document.id]
+                if chunk.source_document.id in self._id_to_boost_map
+                else DEFAULT_BOOST
+            ),
+            tenant_id=self._tenant_id,
+            aggregated_chunk_boost_factor=score,
+            ancestor_hierarchy_node_ids=self._doc_id_to_ancestor_ids[
+                chunk.source_document.id
+            ],
+        )
--- a/backend/onyx/indexing/adapters/user_file_indexing_adapter.py
+++ b/backend/onyx/indexing/adapters/user_file_indexing_adapter.py
@@ -1,6 +1,9 @@
+from __future__ import annotations
+
 import contextlib
 import datetime
 import time
+from collections import defaultdict
 from collections.abc import Generator
 from uuid import UUID

@@ -24,7 +27,8 @@ from onyx.db.user_file import fetch_persona_ids_for_user_files
 from onyx.db.user_file import fetch_user_project_ids_for_user_files
 from onyx.file_store.utils import store_user_file_plaintext
 from onyx.indexing.indexing_pipeline import DocumentBatchPrepareContext
-from onyx.indexing.models import BuildMetadataAwareChunksResult
+from onyx.indexing.models import ChunkEnrichmentContext
+from onyx.indexing.models import DocAwareChunk
 from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import IndexChunk
 from onyx.indexing.models import UpdatableChunkData
@@ -102,13 +106,20 @@ class UserFileIndexingAdapter:
                f"Failed to acquire locks after {_NUM_LOCK_ATTEMPTS} attempts for user files: {[doc.id for doc in documents]}"
            )

-    def build_metadata_aware_chunks(
+    def prepare_enrichment(
        self,
-        chunks_with_embeddings: list[IndexChunk],
-        chunk_content_scores: list[float],
-        tenant_id: str,
        context: DocumentBatchPrepareContext,
-    ) -> BuildMetadataAwareChunksResult:
+        tenant_id: str,
+        chunks: list[DocAwareChunk],
+    ) -> UserFileChunkEnricher:
+        """Do all DB lookups and pre-compute file metadata from chunks."""
+        updatable_ids = [doc.id for doc in context.updatable_docs]
+
+        doc_id_to_new_chunk_cnt: dict[str, int] = defaultdict(int)
+        content_by_file: dict[str, list[str]] = defaultdict(list)
+        for chunk in chunks:
+            doc_id_to_new_chunk_cnt[chunk.source_document.id] += 1
+            content_by_file[chunk.source_document.id].append(chunk.content)

        no_access = DocumentAccess.build(
            user_emails=[],
@@ -118,7 +129,6 @@ class UserFileIndexingAdapter:
            is_public=False,
        )

-        updatable_ids = [doc.id for doc in context.updatable_docs]
        user_file_id_to_project_ids = fetch_user_project_ids_for_user_files(
            user_file_ids=updatable_ids,
            db_session=self.db_session,
@@ -139,17 +149,6 @@ class UserFileIndexingAdapter:
            )
        }

-        user_file_id_to_new_chunk_cnt: dict[str, int] = {
-            user_file_id: len(
-                [
-                    chunk
-                    for chunk in chunks_with_embeddings
-                    if chunk.source_document.id == user_file_id
-                ]
-            )
-            for user_file_id in updatable_ids
-        }
-
        # Initialize tokenizer used for token count calculation
        try:
            llm = get_default_llm()
@@ -164,15 +163,9 @@ class UserFileIndexingAdapter:
        user_file_id_to_raw_text: dict[str, str] = {}
        user_file_id_to_token_count: dict[str, int | None] = {}
        for user_file_id in updatable_ids:
-            user_file_chunks = [
-                chunk
-                for chunk in chunks_with_embeddings
-                if chunk.source_document.id == user_file_id
-            ]
-            if user_file_chunks:
-                combined_content = " ".join(
-                    [chunk.content for chunk in user_file_chunks]
-                )
+            contents = content_by_file.get(user_file_id)
+            if contents:
+                combined_content = " ".join(contents)
                user_file_id_to_raw_text[str(user_file_id)] = combined_content
                token_count: int = (
                    count_tokens(combined_content, llm_tokenizer)
@@ -184,28 +177,16 @@ class UserFileIndexingAdapter:
                user_file_id_to_raw_text[str(user_file_id)] = ""
                user_file_id_to_token_count[str(user_file_id)] = None

-        access_aware_chunks = [
-            DocMetadataAwareIndexChunk.from_index_chunk(
-                index_chunk=chunk,
-                access=user_file_id_to_access.get(chunk.source_document.id, no_access),
-                document_sets=set(),
-                user_project=user_file_id_to_project_ids.get(
-                    chunk.source_document.id, []
-                ),
-                personas=user_file_id_to_persona_ids.get(chunk.source_document.id, []),
-                boost=DEFAULT_BOOST,
-                tenant_id=tenant_id,
-                aggregated_chunk_boost_factor=chunk_content_scores[chunk_num],
-            )
-            for chunk_num, chunk in enumerate(chunks_with_embeddings)
-        ]
-
-        return BuildMetadataAwareChunksResult(
-            chunks=access_aware_chunks,
+        return UserFileChunkEnricher(
+            user_file_id_to_access=user_file_id_to_access,
+            user_file_id_to_project_ids=user_file_id_to_project_ids,
+            user_file_id_to_persona_ids=user_file_id_to_persona_ids,
            doc_id_to_previous_chunk_cnt=user_file_id_to_previous_chunk_cnt,
-            doc_id_to_new_chunk_cnt=user_file_id_to_new_chunk_cnt,
+            doc_id_to_new_chunk_cnt=dict(doc_id_to_new_chunk_cnt),
            user_file_id_to_raw_text=user_file_id_to_raw_text,
            user_file_id_to_token_count=user_file_id_to_token_count,
+            no_access=no_access,
+            tenant_id=tenant_id,
        )

    def _notify_assistant_owners_if_files_ready(
@@ -249,8 +230,9 @@ class UserFileIndexingAdapter:
        context: DocumentBatchPrepareContext,
        updatable_chunk_data: list[UpdatableChunkData],  # noqa: ARG002
        filtered_documents: list[Document],  # noqa: ARG002
-        result: BuildMetadataAwareChunksResult,
+        enrichment: ChunkEnrichmentContext,
    ) -> None:
+        assert isinstance(enrichment, UserFileChunkEnricher)
        user_file_ids = [doc.id for doc in context.updatable_docs]

        user_files = (
@@ -266,8 +248,10 @@ class UserFileIndexingAdapter:
            user_file.last_project_sync_at = datetime.datetime.now(
                datetime.timezone.utc
            )
-            user_file.chunk_count = result.doc_id_to_new_chunk_cnt[str(user_file.id)]
-            user_file.token_count = result.user_file_id_to_token_count[
+            user_file.chunk_count = enrichment.doc_id_to_new_chunk_cnt.get(
+                str(user_file.id), 0
+            )
+            user_file.token_count = enrichment.user_file_id_to_token_count[
                str(user_file.id)
            ]

@@ -279,8 +263,54 @@ class UserFileIndexingAdapter:
        # Store the plaintext in the file store for faster retrieval
        # NOTE: this creates its own session to avoid committing the overall
        # transaction.
-        for user_file_id, raw_text in result.user_file_id_to_raw_text.items():
+        for user_file_id, raw_text in enrichment.user_file_id_to_raw_text.items():
            store_user_file_plaintext(
                user_file_id=UUID(user_file_id),
                plaintext_content=raw_text,
            )
+
+
+class UserFileChunkEnricher:
+    """Pre-computed metadata for per-chunk enrichment of user-uploaded files."""
+
+    def __init__(
+        self,
+        user_file_id_to_access: dict[str, DocumentAccess],
+        user_file_id_to_project_ids: dict[str, list[int]],
+        user_file_id_to_persona_ids: dict[str, list[int]],
+        doc_id_to_previous_chunk_cnt: dict[str, int],
+        doc_id_to_new_chunk_cnt: dict[str, int],
+        user_file_id_to_raw_text: dict[str, str],
+        user_file_id_to_token_count: dict[str, int | None],
+        no_access: DocumentAccess,
+        tenant_id: str,
+    ) -> None:
+        self._user_file_id_to_access = user_file_id_to_access
+        self._user_file_id_to_project_ids = user_file_id_to_project_ids
+        self._user_file_id_to_persona_ids = user_file_id_to_persona_ids
+        self._no_access = no_access
+        self._tenant_id = tenant_id
+        self.doc_id_to_previous_chunk_cnt = doc_id_to_previous_chunk_cnt
+        self.doc_id_to_new_chunk_cnt = doc_id_to_new_chunk_cnt
+        self.user_file_id_to_raw_text = user_file_id_to_raw_text
+        self.user_file_id_to_token_count = user_file_id_to_token_count
+
+    def enrich_chunk(
+        self, chunk: IndexChunk, score: float
+    ) -> DocMetadataAwareIndexChunk:
+        return DocMetadataAwareIndexChunk.from_index_chunk(
+            index_chunk=chunk,
+            access=self._user_file_id_to_access.get(
+                chunk.source_document.id, self._no_access
+            ),
+            document_sets=set(),
+            user_project=self._user_file_id_to_project_ids.get(
+                chunk.source_document.id, []
+            ),
+            personas=self._user_file_id_to_persona_ids.get(
+                chunk.source_document.id, []
+            ),
+            boost=DEFAULT_BOOST,
+            tenant_id=self._tenant_id,
+            aggregated_chunk_boost_factor=score,
+        )
--- a/backend/onyx/indexing/chunk_batch_store.py
+++ b/backend/onyx/indexing/chunk_batch_store.py
@@ -0,0 +1,89 @@
+import pickle
+import shutil
+import tempfile
+from collections.abc import Iterator
+from pathlib import Path
+
+from onyx.indexing.models import IndexChunk
+
+
+class ChunkBatchStore:
+    """Manages serialization of embedded chunks to a temporary directory.
+
+    Owns the temp directory lifetime and provides save/load/stream/scrub
+    operations.
+
+    Use as a context manager to ensure cleanup::
+
+        with ChunkBatchStore() as store:
+            store.save(chunks, batch_idx=0)
+            for chunk in store.stream():
+                ...
+    """
+
+    _EXT = ".pkl"
+
+    def __init__(self) -> None:
+        self._tmpdir: Path | None = None
+
+    # -- context manager -----------------------------------------------------
+
+    def __enter__(self) -> "ChunkBatchStore":
+        self._tmpdir = Path(tempfile.mkdtemp(prefix="onyx_embeddings_"))
+        return self
+
+    def __exit__(self, *_exc: object) -> None:
+        if self._tmpdir is not None:
+            shutil.rmtree(self._tmpdir, ignore_errors=True)
+            self._tmpdir = None
+
+    @property
+    def _dir(self) -> Path:
+        assert self._tmpdir is not None, "ChunkBatchStore used outside context manager"
+        return self._tmpdir
+
+    # -- storage primitives --------------------------------------------------
+
+    def save(self, chunks: list[IndexChunk], batch_idx: int) -> None:
+        """Serialize a batch of embedded chunks to disk."""
+        with open(self._dir / f"batch_{batch_idx}{self._EXT}", "wb") as f:
+            pickle.dump(chunks, f)
+
+    def _load(self, batch_file: Path) -> list[IndexChunk]:
+        """Deserialize a batch of embedded chunks from a file."""
+        with open(batch_file, "rb") as f:
+            return pickle.load(f)
+
+    def _batch_files(self) -> list[Path]:
+        """Return batch files sorted by numeric index."""
+        return sorted(
+            self._dir.glob(f"batch_*{self._EXT}"),
+            key=lambda p: int(p.stem.removeprefix("batch_")),
+        )
+
+    # -- higher-level operations ---------------------------------------------
+
+    def stream(self) -> Iterator[IndexChunk]:
+        """Yield all chunks across all batch files.
+
+        Each call returns a fresh generator, so the data can be iterated
+        multiple times (e.g. once per document index).
+        """
+        for batch_file in self._batch_files():
+            yield from self._load(batch_file)
+
+    def scrub_failed_docs(self, failed_doc_ids: set[str]) -> None:
+        """Remove chunks belonging to *failed_doc_ids* from all batch files.
+
+        When a document fails embedding in batch N, earlier batches may
+        already contain successfully embedded chunks for that document.
+        This ensures the output is all-or-nothing per document.
+        """
+        for batch_file in self._batch_files():
+            batch_chunks = self._load(batch_file)
+            cleaned = [
+                c for c in batch_chunks if c.source_document.id not in failed_doc_ids
+            ]
+            if len(cleaned) != len(batch_chunks):
+                with open(batch_file, "wb") as f:
+                    pickle.dump(cleaned, f)
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@@ -1,5 +1,8 @@
 from collections import defaultdict
 from collections.abc import Callable
+from collections.abc import Generator
+from collections.abc import Iterator
+from contextlib import contextmanager
 from typing import Protocol

 from pydantic import BaseModel
@@ -9,6 +12,7 @@ from sqlalchemy.orm import Session
 from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_NAME
 from onyx.configs.app_configs import DEFAULT_CONTEXTUAL_RAG_LLM_PROVIDER
 from onyx.configs.app_configs import ENABLE_CONTEXTUAL_RAG
+from onyx.configs.app_configs import MAX_CHUNKS_PER_DOC_BATCH
 from onyx.configs.app_configs import MAX_DOCUMENT_CHARS
 from onyx.configs.app_configs import MAX_TOKENS_FOR_FULL_INCLUSION
 from onyx.configs.app_configs import USE_CHUNK_SUMMARY
@@ -43,10 +47,12 @@ from onyx.document_index.interfaces import DocumentMetadata
 from onyx.document_index.interfaces import IndexBatchParams
 from onyx.file_processing.image_summarization import summarize_image_with_error_handling
 from onyx.file_store.file_store import get_default_file_store
+from onyx.indexing.chunk_batch_store import ChunkBatchStore
 from onyx.indexing.chunker import Chunker
 from onyx.indexing.embedder import embed_chunks_with_failure_handling
 from onyx.indexing.embedder import IndexingEmbedder
 from onyx.indexing.models import DocAwareChunk
+from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import IndexingBatchAdapter
 from onyx.indexing.models import UpdatableChunkData
 from onyx.indexing.vector_db_insertion import write_chunks_to_vector_db_with_backoff
@@ -63,6 +69,7 @@ from onyx.natural_language_processing.utils import tokenizer_trim_middle
 from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT1
 from onyx.prompts.contextual_retrieval import CONTEXTUAL_RAG_PROMPT2
 from onyx.prompts.contextual_retrieval import DOCUMENT_SUMMARY_PROMPT
+from onyx.utils.batching import batch_generator
 from onyx.utils.logger import setup_logger
 from onyx.utils.postgres_sanitization import sanitize_documents_for_postgres
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
@@ -91,6 +98,20 @@ class IndexingPipelineResult(BaseModel):

    failures: list[ConnectorFailure]

+    @classmethod
+    def empty(cls, total_docs: int) -> "IndexingPipelineResult":
+        return cls(
+            new_docs=0,
+            total_docs=total_docs,
+            total_chunks=0,
+            failures=[],
+        )
+
+
+class ChunkEmbeddingResult(BaseModel):
+    successful_chunk_ids: list[tuple[int, str]]  # (chunk_id, document_id)
+    connector_failures: list[ConnectorFailure]
+

 class IndexingPipelineProtocol(Protocol):
    def __call__(
@@ -139,6 +160,110 @@ def _upsert_documents_in_db(
        )


+def _get_failed_doc_ids(failures: list[ConnectorFailure]) -> set[str]:
+    """Extract document IDs from a list of connector failures."""
+    return {f.failed_document.document_id for f in failures if f.failed_document}
+
+
+def _embed_chunks_to_store(
+    chunks: list[DocAwareChunk],
+    embedder: IndexingEmbedder,
+    tenant_id: str,
+    request_id: str | None,
+    store: ChunkBatchStore,
+) -> ChunkEmbeddingResult:
+    """Embed chunks in batches, spilling each batch to *store*.
+
+    If a document fails embedding in any batch, its chunks are excluded from
+    all batches (including earlier ones already written) so that the output
+    is all-or-nothing per document.
+    """
+    successful_chunk_ids: list[tuple[int, str]] = []
+    all_embedding_failures: list[ConnectorFailure] = []
+    # Track failed doc IDs across all batches so that a failure in batch N
+    # causes chunks for that doc to be skipped in batch N+1 and stripped
+    # from earlier batches.
+    all_failed_doc_ids: set[str] = set()
+
+    for batch_idx, chunk_batch in enumerate(
+        batch_generator(chunks, MAX_CHUNKS_PER_DOC_BATCH)
+    ):
+        # Skip chunks belonging to documents that failed in earlier batches.
+        chunk_batch = [
+            c for c in chunk_batch if c.source_document.id not in all_failed_doc_ids
+        ]
+        if not chunk_batch:
+            continue
+
+        logger.debug(f"Embedding batch {batch_idx}: {len(chunk_batch)} chunks")
+
+        chunks_with_embeddings, embedding_failures = embed_chunks_with_failure_handling(
+            chunks=chunk_batch,
+            embedder=embedder,
+            tenant_id=tenant_id,
+            request_id=request_id,
+        )
+        all_embedding_failures.extend(embedding_failures)
+        all_failed_doc_ids.update(_get_failed_doc_ids(embedding_failures))
+
+        # Only keep successfully embedded chunks for non-failed docs.
+        chunks_with_embeddings = [
+            c
+            for c in chunks_with_embeddings
+            if c.source_document.id not in all_failed_doc_ids
+        ]
+
+        successful_chunk_ids.extend(
+            (c.chunk_id, c.source_document.id) for c in chunks_with_embeddings
+        )
+
+        store.save(chunks_with_embeddings, batch_idx)
+        del chunks_with_embeddings
+
+    # Scrub earlier batches for docs that failed in later batches.
+    if all_failed_doc_ids:
+        store.scrub_failed_docs(all_failed_doc_ids)
+        successful_chunk_ids = [
+            (chunk_id, doc_id)
+            for chunk_id, doc_id in successful_chunk_ids
+            if doc_id not in all_failed_doc_ids
+        ]
+
+    return ChunkEmbeddingResult(
+        successful_chunk_ids=successful_chunk_ids,
+        connector_failures=all_embedding_failures,
+    )
+
+
+@contextmanager
+def embed_and_stream(
+    chunks: list[DocAwareChunk],
+    embedder: IndexingEmbedder,
+    tenant_id: str,
+    request_id: str | None,
+) -> Generator[tuple[ChunkEmbeddingResult, ChunkBatchStore], None, None]:
+    """Embed chunks to disk and yield a ``(result, store)`` pair.
+
+    The store owns the temp directory — files are cleaned up when the context
+    manager exits.
+
+    Usage::
+
+        with embed_and_stream(chunks, embedder, tenant_id, req_id) as (result, store):
+            for chunk in store.stream():
+                ...
+    """
+    with ChunkBatchStore() as store:
+        result = _embed_chunks_to_store(
+            chunks=chunks,
+            embedder=embedder,
+            tenant_id=tenant_id,
+            request_id=request_id,
+            store=store,
+        )
+        yield result, store
+
+
 def get_doc_ids_to_update(
    documents: list[Document], db_docs: list[DBDocument]
 ) -> list[Document]:
@@ -637,6 +762,29 @@ def add_contextual_summaries(
    return chunks


+def _verify_indexing_completeness(
+    insertion_records: list[DocumentInsertionRecord],
+    write_failures: list[ConnectorFailure],
+    embedding_failed_doc_ids: set[str],
+    updatable_ids: list[str],
+    document_index_name: str,
+) -> None:
+    """Verify that every updatable document was either indexed or reported as failed."""
+    all_returned_doc_ids = (
+        {r.document_id for r in insertion_records}
+        | {f.failed_document.document_id for f in write_failures if f.failed_document}
+        | embedding_failed_doc_ids
+    )
+    if all_returned_doc_ids != set(updatable_ids):
+        raise RuntimeError(
+            f"Some documents were not successfully indexed. "
+            f"Updatable IDs: {updatable_ids}, "
+            f"Returned IDs: {all_returned_doc_ids}. "
+            f"This should never happen. "
+            f"This occured for document index {document_index_name}"
+        )
+
+
@log_function_time(debug_only=True)
 def index_doc_batch(
    *,
@@ -672,12 +820,7 @@ def index_doc_batch(
    filtered_documents = filter_fnc(document_batch)
    context = adapter.prepare(filtered_documents, ignore_time_skip)
    if not context:
-        return IndexingPipelineResult(
-            new_docs=0,
-            total_docs=len(filtered_documents),
-            total_chunks=0,
-            failures=[],
-        )
+        return IndexingPipelineResult.empty(len(filtered_documents))

    # Convert documents to IndexingDocument objects with processed section
    # logger.debug("Processing image sections")
@@ -716,117 +859,99 @@ def index_doc_batch(
        )

    logger.debug("Starting embedding")
-    chunks_with_embeddings, embedding_failures = (
-        embed_chunks_with_failure_handling(
-            chunks=chunks,
-            embedder=embedder,
-            tenant_id=tenant_id,
-            request_id=request_id,
-        )
-        if chunks
-        else ([], [])
-    )
+    with embed_and_stream(chunks, embedder, tenant_id, request_id) as (
+        embedding_result,
+        chunk_store,
+    ):
+        updatable_ids = [doc.id for doc in context.updatable_docs]
+        updatable_chunk_data = [
+            UpdatableChunkData(
+                chunk_id=chunk_id,
+                document_id=document_id,
+                boost_score=1.0,
+            )
+            for chunk_id, document_id in embedding_result.successful_chunk_ids
+        ]

-    chunk_content_scores = [1.0] * len(chunks_with_embeddings)
-
-    updatable_ids = [doc.id for doc in context.updatable_docs]
-    updatable_chunk_data = [
-        UpdatableChunkData(
-            chunk_id=chunk.chunk_id,
-            document_id=chunk.source_document.id,
-            boost_score=score,
-        )
-        for chunk, score in zip(chunks_with_embeddings, chunk_content_scores)
-    ]
-
-    # Acquires a lock on the documents so that no other process can modify them
-    # NOTE: don't need to acquire till here, since this is when the actual race condition
-    # with Vespa can occur.
-    with adapter.lock_context(context.updatable_docs):
-        # we're concerned about race conditions where multiple simultaneous indexings might result
-        # in one set of metadata overwriting another one in vespa.
-        # we still write data here for the immediate and most likely correct sync, but
-        # to resolve this, an update of the last modified field at the end of this loop
-        # always triggers a final metadata sync via the celery queue
-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=chunks_with_embeddings,
-            chunk_content_scores=chunk_content_scores,
-            tenant_id=tenant_id,
-            context=context,
+        embedding_failed_doc_ids = _get_failed_doc_ids(
+            embedding_result.connector_failures
        )

-        short_descriptor_list = [chunk.to_short_descriptor() for chunk in result.chunks]
-        short_descriptor_log = str(short_descriptor_list)[:1024]
-        logger.debug(f"Indexing the following chunks: {short_descriptor_log}")
+        # Filter to only successfully embedded chunks so
+        # doc_id_to_new_chunk_cnt reflects what's actually written to Vespa.
+        embedded_chunks = [
+            c for c in chunks if c.source_document.id not in embedding_failed_doc_ids
+        ]

-        primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = None
-        primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = None
-        for document_index in document_indices:
-            # A document will not be spread across different batches, so all the
-            # documents with chunks in this set, are fully represented by the chunks
-            # in this set
-            (
-                insertion_records,
-                vector_db_write_failures,
-            ) = write_chunks_to_vector_db_with_backoff(
-                document_index=document_index,
-                chunks=result.chunks,
-                index_batch_params=IndexBatchParams(
-                    doc_id_to_previous_chunk_cnt=result.doc_id_to_previous_chunk_cnt,
-                    doc_id_to_new_chunk_cnt=result.doc_id_to_new_chunk_cnt,
-                    tenant_id=tenant_id,
-                    large_chunks_enabled=chunker.enable_large_chunks,
-                ),
+        # Acquires a lock on the documents so that no other process can modify
+        # them.  Not needed until here, since this is when the actual race
+        # condition with vector db can occur.
+        with adapter.lock_context(context.updatable_docs):
+            enricher = adapter.prepare_enrichment(
+                context=context,
+                tenant_id=tenant_id,
+                chunks=embedded_chunks,
            )

-            all_returned_doc_ids: set[str] = (
-                {record.document_id for record in insertion_records}
-                .union(
-                    {
-                        record.failed_document.document_id
-                        for record in vector_db_write_failures
-                        if record.failed_document
-                    }
-                )
-                .union(
-                    {
-                        record.failed_document.document_id
-                        for record in embedding_failures
-                        if record.failed_document
-                    }
-                )
+            index_batch_params = IndexBatchParams(
+                doc_id_to_previous_chunk_cnt=enricher.doc_id_to_previous_chunk_cnt,
+                doc_id_to_new_chunk_cnt=enricher.doc_id_to_new_chunk_cnt,
+                tenant_id=tenant_id,
+                large_chunks_enabled=chunker.enable_large_chunks,
            )
-            if all_returned_doc_ids != set(updatable_ids):
-                raise RuntimeError(
-                    f"Some documents were not successfully indexed. "
-                    f"Updatable IDs: {updatable_ids}, "
-                    f"Returned IDs: {all_returned_doc_ids}. "
-                    "This should never happen."
-                    f"This occured for document index {document_index.__class__.__name__}"
-                )
-            # We treat the first document index we got as the primary one used
-            # for reporting the state of indexing.
-            if primary_doc_idx_insertion_records is None:
-                primary_doc_idx_insertion_records = insertion_records
-            if primary_doc_idx_vector_db_write_failures is None:
-                primary_doc_idx_vector_db_write_failures = vector_db_write_failures

-        adapter.post_index(
-            context=context,
-            updatable_chunk_data=updatable_chunk_data,
-            filtered_documents=filtered_documents,
-            result=result,
-        )
+            primary_doc_idx_insertion_records: list[DocumentInsertionRecord] | None = (
+                None
+            )
+            primary_doc_idx_vector_db_write_failures: list[ConnectorFailure] | None = (
+                None
+            )
+
+            for document_index in document_indices:
+
+                def _enriched_stream() -> Iterator[DocMetadataAwareIndexChunk]:
+                    for chunk in chunk_store.stream():
+                        yield enricher.enrich_chunk(chunk, 1.0)
+
+                insertion_records, write_failures = (
+                    write_chunks_to_vector_db_with_backoff(
+                        document_index=document_index,
+                        make_chunks=_enriched_stream,
+                        index_batch_params=index_batch_params,
+                    )
+                )
+
+                _verify_indexing_completeness(
+                    insertion_records=insertion_records,
+                    write_failures=write_failures,
+                    embedding_failed_doc_ids=embedding_failed_doc_ids,
+                    updatable_ids=updatable_ids,
+                    document_index_name=document_index.__class__.__name__,
+                )
+                # We treat the first document index we got as the primary one used
+                # for reporting the state of indexing.
+                if primary_doc_idx_insertion_records is None:
+                    primary_doc_idx_insertion_records = insertion_records
+                if primary_doc_idx_vector_db_write_failures is None:
+                    primary_doc_idx_vector_db_write_failures = write_failures
+
+            adapter.post_index(
+                context=context,
+                updatable_chunk_data=updatable_chunk_data,
+                filtered_documents=filtered_documents,
+                enrichment=enricher,
+            )

    assert primary_doc_idx_insertion_records is not None
    assert primary_doc_idx_vector_db_write_failures is not None
    return IndexingPipelineResult(
-        new_docs=len(
-            [r for r in primary_doc_idx_insertion_records if not r.already_existed]
+        new_docs=sum(
+            1 for r in primary_doc_idx_insertion_records if not r.already_existed
        ),
        total_docs=len(filtered_documents),
-        total_chunks=len(chunks_with_embeddings),
-        failures=primary_doc_idx_vector_db_write_failures + embedding_failures,
+        total_chunks=len(embedding_result.successful_chunk_ids),
+        failures=primary_doc_idx_vector_db_write_failures
+        + embedding_result.connector_failures,
    )


--- a/backend/onyx/indexing/models.py
+++ b/backend/onyx/indexing/models.py
@@ -235,12 +235,16 @@ class UpdatableChunkData(BaseModel):
    boost_score: float


-class BuildMetadataAwareChunksResult(BaseModel):
-    chunks: list[DocMetadataAwareIndexChunk]
+class ChunkEnrichmentContext(Protocol):
+    """Returned by prepare_enrichment. Holds pre-computed metadata lookups
+    and provides per-chunk enrichment."""
+
    doc_id_to_previous_chunk_cnt: dict[str, int]
    doc_id_to_new_chunk_cnt: dict[str, int]
-    user_file_id_to_raw_text: dict[str, str]
-    user_file_id_to_token_count: dict[str, int | None]
+
+    def enrich_chunk(
+        self, chunk: IndexChunk, score: float
+    ) -> DocMetadataAwareIndexChunk: ...


 class IndexingBatchAdapter(Protocol):
@@ -254,18 +258,24 @@ class IndexingBatchAdapter(Protocol):
    ) -> Generator[TransactionalContext, None, None]:
        """Provide a transaction/row-lock context for critical updates."""

-    def build_metadata_aware_chunks(
+    def prepare_enrichment(
        self,
-        chunks_with_embeddings: list[IndexChunk],
-        chunk_content_scores: list[float],
-        tenant_id: str,
        context: "DocumentBatchPrepareContext",
-    ) -> BuildMetadataAwareChunksResult: ...
+        tenant_id: str,
+        chunks: list[DocAwareChunk],
+    ) -> ChunkEnrichmentContext:
+        """Prepare per-chunk enrichment data (access, document sets, boost, etc.).
+
+        Precondition: ``chunks`` have already been through the embedding step
+        (i.e. they are ``IndexChunk`` instances with populated embeddings,
+        passed here as the base ``DocAwareChunk`` type).
+        """
+        ...

    def post_index(
        self,
        context: "DocumentBatchPrepareContext",
        updatable_chunk_data: list[UpdatableChunkData],
        filtered_documents: list[Document],
-        result: BuildMetadataAwareChunksResult,
+        enrichment: ChunkEnrichmentContext,
    ) -> None: ...
--- a/backend/onyx/indexing/vector_db_insertion.py
+++ b/backend/onyx/indexing/vector_db_insertion.py
@@ -1,6 +1,9 @@
 import time
-from collections import defaultdict
+from collections.abc import Callable
+from collections.abc import Iterable
 from http import HTTPStatus
+from itertools import chain
+from itertools import groupby

 import httpx

@@ -28,22 +31,22 @@ def _log_insufficient_storage_error(e: Exception) -> None:

 def write_chunks_to_vector_db_with_backoff(
    document_index: DocumentIndex,
-    chunks: list[DocMetadataAwareIndexChunk],
+    make_chunks: Callable[[], Iterable[DocMetadataAwareIndexChunk]],
    index_batch_params: IndexBatchParams,
 ) -> tuple[list[DocumentInsertionRecord], list[ConnectorFailure]]:
    """Tries to insert all chunks in one large batch. If that batch fails for any reason,
    goes document by document to isolate the failure(s).

    IMPORTANT: must pass in whole documents at a time not individual chunks, since the
-    vector DB interface assumes that all chunks for a single document are present.
+    vector DB interface assumes that all chunks for a single document are present. The
+    chunks must also be in contiguous batches
    """
-
    # first try to write the chunks to the vector db
    try:
        return (
            list(
                document_index.index(
-                    chunks=chunks,
+                    chunks=make_chunks(),
                    index_batch_params=index_batch_params,
                )
            ),
@@ -60,14 +63,23 @@ def write_chunks_to_vector_db_with_backoff(
        # wait a couple seconds just to give the vector db a chance to recover
        time.sleep(2)

-    # try writing each doc one by one
-    chunks_for_docs: dict[str, list[DocMetadataAwareIndexChunk]] = defaultdict(list)
-    for chunk in chunks:
-        chunks_for_docs[chunk.source_document.id].append(chunk)
-
    insertion_records: list[DocumentInsertionRecord] = []
    failures: list[ConnectorFailure] = []
-    for doc_id, chunks_for_doc in chunks_for_docs.items():
+
+    def key(chunk: DocMetadataAwareIndexChunk) -> str:
+        return chunk.source_document.id
+
+    seen_doc_ids: set[str] = set()
+    for doc_id, chunks_for_doc in groupby(make_chunks(), key=key):
+        if doc_id in seen_doc_ids:
+            raise RuntimeError(
+                f"Doc chunks are not arriving in order. Current doc_id={doc_id}, seen_doc_ids={list(seen_doc_ids)}"
+            )
+        seen_doc_ids.add(doc_id)
+
+        first_chunk = next(chunks_for_doc)
+        chunks_for_doc = chain([first_chunk], chunks_for_doc)
+
        try:
            insertion_records.extend(
                document_index.index(
@@ -87,9 +99,7 @@ def write_chunks_to_vector_db_with_backoff(
                ConnectorFailure(
                    failed_document=DocumentFailure(
                        document_id=doc_id,
-                        document_link=(
-                            chunks_for_doc[0].get_link() if chunks_for_doc else None
-                        ),
+                        document_link=first_chunk.get_link(),
                    ),
                    failure_message=str(e),
                    exception=e,
--- a/backend/onyx/llm/multi_llm.py
+++ b/backend/onyx/llm/multi_llm.py
@@ -185,6 +185,21 @@ def _messages_contain_tool_content(messages: list[dict[str, Any]]) -> bool:
    return False


+def _prompt_contains_tool_call_history(prompt: LanguageModelInput) -> bool:
+    """Check if the prompt contains any assistant messages with tool_calls.
+
+    When Anthropic's extended thinking is enabled, the API requires every
+    assistant message to start with a thinking block before any tool_use
+    blocks.  Since we don't preserve thinking_blocks (they carry
+    cryptographic signatures that can't be reconstructed), we must skip
+    the thinking param whenever history contains prior tool-calling turns.
+    """
+    from onyx.llm.models import AssistantMessage
+
+    msgs = prompt if isinstance(prompt, list) else [prompt]
+    return any(isinstance(msg, AssistantMessage) and msg.tool_calls for msg in msgs)
+
+
 def _is_vertex_model_rejecting_output_config(model_name: str) -> bool:
    normalized_model_name = model_name.lower()
    return any(
@@ -466,7 +481,20 @@ class LitellmLLM(LLM):
                    reasoning_effort
                )

-                if budget_tokens is not None:
+                # Anthropic requires every assistant message with tool_use
+                # blocks to start with a thinking block that carries a
+                # cryptographic signature.  We don't preserve those blocks
+                # across turns, so skip thinking when the history already
+                # contains tool-calling assistant messages.  LiteLLM's
+                # modify_params workaround doesn't cover all providers
+                # (notably Bedrock).
+                can_enable_thinking = (
+                    budget_tokens is not None
+                    and not _prompt_contains_tool_call_history(prompt)
+                )
+
+                if can_enable_thinking:
+                    assert budget_tokens is not None  # mypy
                    if max_tokens is not None:
                        # Anthropic has a weird rule where max token has to be at least as much as budget tokens if set
                        # and the minimum budget tokens is 1024
--- a/backend/onyx/main.py
+++ b/backend/onyx/main.py
@@ -77,7 +77,6 @@ from onyx.server.features.default_assistant.api import (
 )
 from onyx.server.features.document_set.api import router as document_set_router
 from onyx.server.features.hierarchy.api import router as hierarchy_router
-from onyx.server.features.hooks.api import router as hook_router
 from onyx.server.features.input_prompt.api import (
    admin_router as admin_input_prompt_router,
 )
@@ -439,6 +438,7 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
            dsn=SENTRY_DSN,
            integrations=[StarletteIntegration(), FastApiIntegration()],
            traces_sample_rate=0.1,
+            release=__version__,
        )
        logger.info("Sentry initialized")
    else:
@@ -454,7 +454,6 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:

    register_onyx_exception_handlers(application)

-    include_router_with_global_prefix_prepended(application, hook_router)
    include_router_with_global_prefix_prepended(application, password_router)
    include_router_with_global_prefix_prepended(application, chat_router)
    include_router_with_global_prefix_prepended(application, query_router)
--- a/backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/package-lock.json
+++ b/backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/package-lock.json
@@ -3844,9 +3844,9 @@
      }
    },
    "node_modules/@ts-morph/common/node_modules/brace-expansion": {
-      "version": "5.0.3",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.3.tgz",
-      "integrity": "sha512-fy6KJm2RawA5RcHkLa1z/ScpBeA762UF9KmZQxwIbDtRJrgLzM10depAiEQ+CXYcoiqW1/m96OAAoke2nE9EeA==",
+      "version": "5.0.5",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
+      "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
      "license": "MIT",
      "dependencies": {
        "balanced-match": "^4.0.2"
@@ -4224,9 +4224,9 @@
      }
    },
    "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
-      "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.3.tgz",
+      "integrity": "sha512-MCV/fYJEbqx68aE58kv2cA/kiky1G8vux3OR6/jbS+jIMe/6fJWa0DTzJU7dqijOWYwHi1t29FlfYI9uytqlpA==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
@@ -5007,9 +5007,9 @@
      }
    },
    "node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+      "version": "1.1.13",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.13.tgz",
+      "integrity": "sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
--- a/backend/onyx/server/features/projects/projects_file_utils.py
+++ b/backend/onyx/server/features/projects/projects_file_utils.py
@@ -83,17 +83,9 @@ class CategorizedFiles(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)


-# Extensions that bypass the token-count threshold on upload.
-_TOKEN_THRESHOLD_EXEMPT_EXTENSIONS: set[str] = {
-    ".csv",
-    ".tsv",
-    ".xlsx",
-}
-
-
 def _skip_token_threshold(extension: str) -> bool:
    """Return True if this file extension should bypass the token limit."""
-    return extension.lower() in _TOKEN_THRESHOLD_EXEMPT_EXTENSIONS
+    return extension.lower() in OnyxFileExtensions.TABULAR_EXTENSIONS


 def _apply_long_side_cap(width: int, height: int, cap: int) -> tuple[int, int]:
--- a/backend/onyx/server/metrics/indexing_pipeline.py
+++ b/backend/onyx/server/metrics/indexing_pipeline.py
@@ -12,7 +12,6 @@ stale, which is fine for monitoring dashboards.
 import json
 import threading
 import time
-from collections.abc import Callable
 from datetime import datetime
 from datetime import timezone
 from typing import Any
@@ -104,25 +103,23 @@ class _CachedCollector(Collector):


 class QueueDepthCollector(_CachedCollector):
-    """Reads Celery queue lengths from the broker Redis on each scrape.
-
-    Uses a Redis client factory (callable) rather than a stored client
-    reference so the connection is always fresh from Celery's pool.
-    """
+    """Reads Celery queue lengths from the broker Redis on each scrape."""

    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
        super().__init__(cache_ttl)
-        self._get_redis: Callable[[], Redis] | None = None
+        self._celery_app: Any | None = None

-    def set_redis_factory(self, factory: Callable[[], Redis]) -> None:
-        """Set a callable that returns a broker Redis client on demand."""
-        self._get_redis = factory
+    def set_celery_app(self, app: Any) -> None:
+        """Set the Celery app for broker Redis access."""
+        self._celery_app = app

    def _collect_fresh(self) -> list[GaugeMetricFamily]:
-        if self._get_redis is None:
+        if self._celery_app is None:
            return []

-        redis_client = self._get_redis()
+        from onyx.background.celery.celery_redis import celery_get_broker_client
+
+        redis_client = celery_get_broker_client(self._celery_app)

        depth = GaugeMetricFamily(
            "onyx_queue_depth",
@@ -404,17 +401,19 @@ class RedisHealthCollector(_CachedCollector):

    def __init__(self, cache_ttl: float = _DEFAULT_CACHE_TTL) -> None:
        super().__init__(cache_ttl)
-        self._get_redis: Callable[[], Redis] | None = None
+        self._celery_app: Any | None = None

-    def set_redis_factory(self, factory: Callable[[], Redis]) -> None:
-        """Set a callable that returns a broker Redis client on demand."""
-        self._get_redis = factory
+    def set_celery_app(self, app: Any) -> None:
+        """Set the Celery app for broker Redis access."""
+        self._celery_app = app

    def _collect_fresh(self) -> list[GaugeMetricFamily]:
-        if self._get_redis is None:
+        if self._celery_app is None:
            return []

-        redis_client = self._get_redis()
+        from onyx.background.celery.celery_redis import celery_get_broker_client
+
+        redis_client = celery_get_broker_client(self._celery_app)

        memory_used = GaugeMetricFamily(
            "onyx_redis_memory_used_bytes",
--- a/backend/onyx/server/metrics/indexing_pipeline_setup.py
+++ b/backend/onyx/server/metrics/indexing_pipeline_setup.py
@@ -3,12 +3,8 @@
 Called once by the monitoring celery worker after Redis and DB are ready.
 """

-from collections.abc import Callable
-from typing import Any
-
 from celery import Celery
 from prometheus_client.registry import REGISTRY
-from redis import Redis

 from onyx.server.metrics.indexing_pipeline import ConnectorHealthCollector
 from onyx.server.metrics.indexing_pipeline import IndexAttemptCollector
@@ -21,7 +17,7 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()

 # Module-level singletons — these are lightweight objects (no connections or DB
-# state) until configure() / set_redis_factory() is called. Keeping them at
+# state) until configure() / set_celery_app() is called. Keeping them at
 # module level ensures they survive the lifetime of the worker process and are
 # only registered with the Prometheus registry once.
 _queue_collector = QueueDepthCollector()
@@ -32,72 +28,15 @@ _worker_health_collector = WorkerHealthCollector()
 _heartbeat_monitor: WorkerHeartbeatMonitor | None = None


-def _make_broker_redis_factory(celery_app: Celery) -> Callable[[], Redis]:
-    """Create a factory that returns a cached broker Redis client.
-
-    Reuses a single connection across scrapes to avoid leaking connections.
-    Reconnects automatically if the cached connection becomes stale.
-    """
-    _cached_client: list[Redis | None] = [None]
-    # Keep a reference to the Kombu Connection so we can close it on
-    # reconnect (the raw Redis client outlives the Kombu wrapper).
-    _cached_kombu_conn: list[Any] = [None]
-
-    def _close_client(client: Redis) -> None:
-        """Best-effort close of a Redis client."""
-        try:
-            client.close()
-        except Exception:
-            logger.debug("Failed to close stale Redis client", exc_info=True)
-
-    def _close_kombu_conn() -> None:
-        """Best-effort close of the cached Kombu Connection."""
-        conn = _cached_kombu_conn[0]
-        if conn is not None:
-            try:
-                conn.close()
-            except Exception:
-                logger.debug("Failed to close Kombu connection", exc_info=True)
-            _cached_kombu_conn[0] = None
-
-    def _get_broker_redis() -> Redis:
-        client = _cached_client[0]
-        if client is not None:
-            try:
-                client.ping()
-                return client
-            except Exception:
-                logger.debug("Cached Redis client stale, reconnecting")
-                _close_client(client)
-                _cached_client[0] = None
-                _close_kombu_conn()
-
-        # Get a fresh Redis client from the broker connection.
-        # We hold this client long-term (cached above) rather than using a
-        # context manager, because we need it to persist across scrapes.
-        # The caching logic above ensures we only ever hold one connection,
-        # and we close it explicitly on reconnect.
-        conn = celery_app.broker_connection()
-        # kombu's Channel exposes .client at runtime (the underlying Redis
-        # client) but the type stubs don't declare it.
-        new_client: Redis = conn.channel().client  # type: ignore[attr-defined]
-        _cached_client[0] = new_client
-        _cached_kombu_conn[0] = conn
-        return new_client
-
-    return _get_broker_redis
-
-
 def setup_indexing_pipeline_metrics(celery_app: Celery) -> None:
    """Register all indexing pipeline collectors with the default registry.

    Args:
-        celery_app: The Celery application instance. Used to obtain a fresh
+        celery_app: The Celery application instance. Used to obtain a
            broker Redis client on each scrape for queue depth metrics.
    """
-    redis_factory = _make_broker_redis_factory(celery_app)
-    _queue_collector.set_redis_factory(redis_factory)
-    _redis_health_collector.set_redis_factory(redis_factory)
+    _queue_collector.set_celery_app(celery_app)
+    _redis_health_collector.set_celery_app(celery_app)

    # Start the heartbeat monitor daemon thread — uses a single persistent
    # connection to receive worker-heartbeat events.
--- a/backend/onyx/server/settings/api.py
+++ b/backend/onyx/server/settings/api.py
@@ -21,7 +21,6 @@ from onyx.db.notification import get_notifications
 from onyx.db.notification import update_notification_last_shown
 from onyx.error_handling.error_codes import OnyxErrorCode
 from onyx.error_handling.exceptions import OnyxError
-from onyx.hooks.utils import HOOKS_AVAILABLE
 from onyx.key_value_store.factory import get_kv_store
 from onyx.key_value_store.interface import KvKeyNotFoundError
 from onyx.server.features.build.utils import is_onyx_craft_enabled
@@ -38,6 +37,7 @@ from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import (
    fetch_versioned_implementation_with_fallback,
 )
+from shared_configs.configs import MULTI_TENANT

 logger = setup_logger()

@@ -98,7 +98,7 @@ def fetch_settings(
        needs_reindexing=needs_reindexing,
        onyx_craft_enabled=onyx_craft_enabled_for_user,
        vector_db_enabled=not DISABLE_VECTOR_DB,
-        hooks_enabled=HOOKS_AVAILABLE,
+        hooks_enabled=not MULTI_TENANT,
        version=onyx_version,
        max_allowed_upload_size_mb=MAX_ALLOWED_UPLOAD_SIZE_MB,
        default_user_file_max_upload_size_mb=min(
--- a/backend/onyx/server/settings/models.py
+++ b/backend/onyx/server/settings/models.py
@@ -116,7 +116,7 @@ class UserSettings(Settings):
    # False when DISABLE_VECTOR_DB is set — connectors, RAG search, and
    # document sets are unavailable.
    vector_db_enabled: bool = True
-    # True when hooks are available: single-tenant deployment with HOOK_ENABLED=true.
+    # True when hooks are available: single-tenant EE deployments only.
    hooks_enabled: bool = False
    # Application version, read from the ONYX_VERSION env var at startup.
    version: str | None = None
--- a/backend/onyx/tools/tool_implementations/file_reader/file_reader_tool.py
+++ b/backend/onyx/tools/tool_implementations/file_reader/file_reader_tool.py
@@ -1,3 +1,4 @@
+import io
 import json
 from typing import Any
 from typing import cast
@@ -9,6 +10,7 @@ from typing_extensions import override
 from onyx.chat.emitter import Emitter
 from onyx.configs.app_configs import DISABLE_VECTOR_DB
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
+from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_store.models import ChatFileType
 from onyx.file_store.models import InMemoryChatFile
 from onyx.file_store.utils import load_chat_file_by_id
@@ -172,7 +174,10 @@ class FileReaderTool(Tool[FileReaderToolOverrideKwargs]):
        # Only PLAIN_TEXT and TABULAR are guaranteed to contain actual text bytes.
        # DOC type in a loaded file means plaintext extraction failed and the
        # content is the original binary (e.g. raw PDF/DOCX bytes).
-        if chat_file.file_type not in (ChatFileType.PLAIN_TEXT, ChatFileType.TABULAR):
+        if chat_file.file_type not in (
+            ChatFileType.PLAIN_TEXT,
+            ChatFileType.TABULAR,
+        ):
            raise ToolCallException(
                message=f"File {file_id} is not a text file (type={chat_file.file_type})",
                llm_facing_message=(
@@ -181,7 +186,19 @@ class FileReaderTool(Tool[FileReaderToolOverrideKwargs]):
            )

        try:
-            full_text = chat_file.content.decode("utf-8", errors="replace")
+            if chat_file.file_type == ChatFileType.PLAIN_TEXT:
+                full_text = chat_file.content.decode("utf-8", errors="replace")
+            else:
+                full_text = (
+                    extract_file_text(
+                        file=io.BytesIO(chat_file.content),
+                        file_name=chat_file.filename or "",
+                        break_on_unprocessable=False,
+                    )
+                    or ""
+                )
+        except ToolCallException:
+            raise
        except Exception:
            raise ToolCallException(
                message=f"Failed to decode file {file_id}",
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -187,7 +187,7 @@ coloredlogs==15.0.1
    # via onnxruntime
 courlan==1.3.2
    # via trafilatura
-cryptography==46.0.5
+cryptography==46.0.6
    # via
    #   authlib
    #   google-auth
@@ -449,7 +449,7 @@ kombu==5.5.4
    # via celery
 kubernetes==31.0.0
    # via onyx
-langchain-core==1.2.11
+langchain-core==1.2.22
    # via onyx
 langdetect==1.0.9
    # via unstructured
@@ -735,7 +735,7 @@ pyee==13.0.0
    # via playwright
 pygithub==2.5.0
    # via onyx
-pygments==2.19.2
+pygments==2.20.0
    # via rich
 pyjwt==2.12.0
    # via
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@@ -97,7 +97,7 @@ comm==0.2.3
    # via ipykernel
 contourpy==1.3.3
    # via matplotlib
-cryptography==46.0.5
+cryptography==46.0.6
    # via
    #   google-auth
    #   pyjwt
@@ -263,7 +263,7 @@ oauthlib==3.2.2
    # via
    #   kubernetes
    #   requests-oauthlib
-onyx-devtools==0.7.1
+onyx-devtools==0.7.2
    # via onyx
 openai==2.14.0
    # via
@@ -349,7 +349,7 @@ pydantic-core==2.33.2
    # via pydantic
 pydantic-settings==2.12.0
    # via mcp
-pygments==2.19.2
+pygments==2.20.0
    # via
    #   ipython
    #   ipython-pygments-lexers
--- a/backend/requirements/ee.txt
+++ b/backend/requirements/ee.txt
@@ -76,7 +76,7 @@ colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   tqdm
-cryptography==46.0.5
+cryptography==46.0.6
    # via
    #   google-auth
    #   pyjwt
--- a/backend/requirements/model_server.txt
+++ b/backend/requirements/model_server.txt
@@ -92,7 +92,7 @@ colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   tqdm
-cryptography==46.0.5
+cryptography==46.0.6
    # via
    #   google-auth
    #   pyjwt
--- a/backend/scripts/run_industryrag_bench_questions.py
+++ b/backend/scripts/run_industryrag_bench_questions.py
@@ -5,6 +5,7 @@ import asyncio
 import json
 import logging
 import sys
+import time
 from dataclasses import asdict
 from dataclasses import dataclass
 from pathlib import Path
@@ -27,6 +28,9 @@ INTERNAL_SEARCH_TOOL_NAME = "internal_search"
 INTERNAL_SEARCH_IN_CODE_TOOL_ID = "SearchTool"
 MAX_REQUEST_ATTEMPTS = 5
 RETRIABLE_STATUS_CODES = {429, 500, 502, 503, 504}
+QUESTION_TIMEOUT_SECONDS = 300
+QUESTION_RETRY_PAUSE_SECONDS = 30
+MAX_QUESTION_ATTEMPTS = 3


@dataclass(frozen=True)
@@ -109,6 +113,27 @@ def normalize_api_base(api_base: str) -> str:
    return f"{normalized}/api"


+def load_completed_question_ids(output_file: Path) -> set[str]:
+    if not output_file.exists():
+        return set()
+
+    completed_ids: set[str] = set()
+    with output_file.open("r", encoding="utf-8") as file:
+        for line in file:
+            stripped = line.strip()
+            if not stripped:
+                continue
+            try:
+                record = json.loads(stripped)
+            except json.JSONDecodeError:
+                continue
+            question_id = record.get("question_id")
+            if isinstance(question_id, str) and question_id:
+                completed_ids.add(question_id)
+
+    return completed_ids
+
+
 def load_questions(questions_file: Path) -> list[QuestionRecord]:
    if not questions_file.exists():
        raise FileNotFoundError(f"Questions file not found: {questions_file}")
@@ -348,6 +373,7 @@ async def generate_answers(
    api_base: str,
    api_key: str,
    parallelism: int,
+    skipped: int,
 ) -> None:
    if parallelism < 1:
        raise ValueError("`--parallelism` must be at least 1.")
@@ -382,58 +408,178 @@ async def generate_answers(
            write_lock = asyncio.Lock()
            completed = 0
            successful = 0
+            stuck_count = 0
            failed_questions: list[FailedQuestionRecord] = []
-            total = len(questions)
+            remaining_count = len(questions)
+            overall_total = remaining_count + skipped
+            question_durations: list[float] = []
+            run_start_time = time.monotonic()
+
+            def print_progress() -> None:
+                avg_time = (
+                    sum(question_durations) / len(question_durations)
+                    if question_durations
+                    else 0.0
+                )
+                elapsed = time.monotonic() - run_start_time
+                eta = avg_time * (remaining_count - completed) / max(parallelism, 1)
+
+                done = skipped + completed
+                bar_width = 30
+                filled = (
+                    int(bar_width * done / overall_total)
+                    if overall_total
+                    else bar_width
+                )
+                bar = "█" * filled + "░" * (bar_width - filled)
+                pct = (done / overall_total * 100) if overall_total else 100.0
+
+                parts = (
+                    f"\r{bar} {pct:5.1f}% "
+                    f"[{done}/{overall_total}] "
+                    f"avg {avg_time:.1f}s/q "
+                    f"elapsed {elapsed:.0f}s "
+                    f"ETA {eta:.0f}s "
+                    f"(ok:{successful} fail:{len(failed_questions)}"
+                )
+                if stuck_count:
+                    parts += f" stuck:{stuck_count}"
+                if skipped:
+                    parts += f" skip:{skipped}"
+                parts += ")"
+
+                sys.stderr.write(parts)
+                sys.stderr.flush()
+
+            print_progress()

            async def process_question(question_record: QuestionRecord) -> None:
                nonlocal completed
                nonlocal successful
+                nonlocal stuck_count

-                try:
-                    async with semaphore:
-                        result = await submit_question(
-                            session=session,
-                            api_base=api_base,
-                            headers=headers,
-                            internal_search_tool_id=internal_search_tool_id,
-                            question_record=question_record,
+                last_error: Exception | None = None
+                for attempt in range(1, MAX_QUESTION_ATTEMPTS + 1):
+                    q_start = time.monotonic()
+                    try:
+                        async with semaphore:
+                            result = await asyncio.wait_for(
+                                submit_question(
+                                    session=session,
+                                    api_base=api_base,
+                                    headers=headers,
+                                    internal_search_tool_id=internal_search_tool_id,
+                                    question_record=question_record,
+                                ),
+                                timeout=QUESTION_TIMEOUT_SECONDS,
+                            )
+                    except asyncio.TimeoutError:
+                        async with progress_lock:
+                            stuck_count += 1
+                            logger.warning(
+                                "Question %s timed out after %ss (attempt %s/%s, "
+                                "total stuck: %s) — retrying in %ss",
+                                question_record.question_id,
+                                QUESTION_TIMEOUT_SECONDS,
+                                attempt,
+                                MAX_QUESTION_ATTEMPTS,
+                                stuck_count,
+                                QUESTION_RETRY_PAUSE_SECONDS,
+                            )
+                            print_progress()
+                        last_error = TimeoutError(
+                            f"Timed out after {QUESTION_TIMEOUT_SECONDS}s "
+                            f"on attempt {attempt}/{MAX_QUESTION_ATTEMPTS}"
                        )
-                except Exception as exc:
+                        await asyncio.sleep(QUESTION_RETRY_PAUSE_SECONDS)
+                        continue
+                    except Exception as exc:
+                        duration = time.monotonic() - q_start
+                        async with progress_lock:
+                            completed += 1
+                            question_durations.append(duration)
+                            failed_questions.append(
+                                FailedQuestionRecord(
+                                    question_id=question_record.question_id,
+                                    error=str(exc),
+                                )
+                            )
+                            logger.exception(
+                                "Failed question %s (%s/%s)",
+                                question_record.question_id,
+                                completed,
+                                remaining_count,
+                            )
+                            print_progress()
+                        return
+
+                    duration = time.monotonic() - q_start
+
+                    async with write_lock:
+                        file.write(json.dumps(asdict(result), ensure_ascii=False))
+                        file.write("\n")
+                        file.flush()
+
                    async with progress_lock:
                        completed += 1
-                        failed_questions.append(
-                            FailedQuestionRecord(
-                                question_id=question_record.question_id,
-                                error=str(exc),
-                            )
-                        )
-                        logger.exception(
-                            "Failed question %s (%s/%s)",
-                            question_record.question_id,
-                            completed,
-                            total,
-                        )
+                        successful += 1
+                        question_durations.append(duration)
+                        print_progress()
                    return

-                async with write_lock:
-                    file.write(json.dumps(asdict(result), ensure_ascii=False))
-                    file.write("\n")
-                    file.flush()
-
+                # All attempts exhausted due to timeouts
                async with progress_lock:
                    completed += 1
-                    successful += 1
-                    logger.info("Processed %s/%s questions", completed, total)
+                    failed_questions.append(
+                        FailedQuestionRecord(
+                            question_id=question_record.question_id,
+                            error=str(last_error),
+                        )
+                    )
+                    logger.error(
+                        "Question %s failed after %s timeout attempts (%s/%s)",
+                        question_record.question_id,
+                        MAX_QUESTION_ATTEMPTS,
+                        completed,
+                        remaining_count,
+                    )
+                    print_progress()

            await asyncio.gather(
                *(process_question(question_record) for question_record in questions)
            )

+            # Final newline after progress bar
+            sys.stderr.write("\n")
+            sys.stderr.flush()
+
+            total_elapsed = time.monotonic() - run_start_time
+            avg_time = (
+                sum(question_durations) / len(question_durations)
+                if question_durations
+                else 0.0
+            )
+            stuck_suffix = f", {stuck_count} stuck timeouts" if stuck_count else ""
+            resume_suffix = (
+                f" — {skipped} previously completed, "
+                f"{skipped + successful}/{overall_total} overall"
+                if skipped
+                else ""
+            )
+            logger.info(
+                "Done: %s/%s successful in %.1fs (avg %.1fs/question%s)%s",
+                successful,
+                remaining_count,
+                total_elapsed,
+                avg_time,
+                stuck_suffix,
+                resume_suffix,
+            )
+
            if failed_questions:
                logger.warning(
-                    "Completed with %s failed questions and %s successful questions.",
+                    "%s questions failed:",
                    len(failed_questions),
-                    successful,
                )
                for failed_question in failed_questions:
                    logger.warning(
@@ -453,7 +599,30 @@ def main() -> None:
            raise ValueError("`--max-questions` must be at least 1 when provided.")
        questions = questions[: args.max_questions]

-    logger.info("Loaded %s questions from %s", len(questions), args.questions_file)
+    completed_ids = load_completed_question_ids(args.output_file)
+    logger.info(
+        "Found %s already-answered question IDs in %s",
+        len(completed_ids),
+        args.output_file,
+    )
+    total_before_filter = len(questions)
+    questions = [q for q in questions if q.question_id not in completed_ids]
+    skipped = total_before_filter - len(questions)
+
+    if skipped:
+        logger.info(
+            "Resuming: %s/%s already answered, %s remaining",
+            skipped,
+            total_before_filter,
+            len(questions),
+        )
+    else:
+        logger.info("Loaded %s questions from %s", len(questions), args.questions_file)
+
+    if not questions:
+        logger.info("All questions already answered. Nothing to do.")
+        return
+
    logger.info("Writing answers to %s", args.output_file)

    asyncio.run(
@@ -463,6 +632,7 @@ def main() -> None:
            api_base=api_base,
            api_key=args.api_key,
            parallelism=args.parallelism,
+            skipped=skipped,
        )
    )

--- a/backend/tests/external_dependency_unit/celery/test_persona_file_sync.py
+++ b/backend/tests/external_dependency_unit/celery/test_persona_file_sync.py
@@ -129,6 +129,10 @@ def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, Non
            return_value=mock_app,
        ),
        patch(_PATCH_QUEUE_DEPTH, return_value=0),
+        patch(
+            "onyx.background.celery.tasks.user_file_processing.tasks.celery_get_broker_client",
+            return_value=MagicMock(),
+        ),
    ):
        yield

--- a/backend/tests/external_dependency_unit/celery/test_user_file_delete_queue.py
+++ b/backend/tests/external_dependency_unit/celery/test_user_file_delete_queue.py
@@ -88,10 +88,22 @@ def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, Non
    the actual task instance.  We patch ``app`` on that instance's class
    (a unique Celery-generated Task subclass) so the mock is scoped to this
    task only.
+
+    Also patches ``celery_get_broker_client`` so the mock app doesn't need
+    a real broker URL.
    """
    task_instance = task.run.__self__
-    with patch.object(
-        type(task_instance), "app", new_callable=PropertyMock, return_value=mock_app
+    with (
+        patch.object(
+            type(task_instance),
+            "app",
+            new_callable=PropertyMock,
+            return_value=mock_app,
+        ),
+        patch(
+            "onyx.background.celery.tasks.user_file_processing.tasks.celery_get_broker_client",
+            return_value=MagicMock(),
+        ),
    ):
        yield

--- a/backend/tests/external_dependency_unit/celery/test_user_file_indexing_adapter.py
+++ b/backend/tests/external_dependency_unit/celery/test_user_file_indexing_adapter.py
@@ -1,7 +1,7 @@
 """
 External dependency unit tests for UserFileIndexingAdapter metadata writing.

-Validates that build_metadata_aware_chunks produces DocMetadataAwareIndexChunk
+Validates that prepare_enrichment produces DocMetadataAwareIndexChunk
 objects with both `user_project` and `personas` fields populated correctly
 based on actual DB associations.

@@ -127,7 +127,7 @@ def _make_index_chunk(user_file: UserFile) -> IndexChunk:


 class TestAdapterWritesBothMetadataFields:
-    """build_metadata_aware_chunks must populate user_project AND personas."""
+    """prepare_enrichment must populate user_project AND personas."""

    @patch(
        "onyx.indexing.adapters.user_file_indexing_adapter.get_default_llm",
@@ -153,15 +153,13 @@ class TestAdapterWritesBothMetadataFields:
        doc = chunk.source_document
        context = DocumentBatchPrepareContext(updatable_docs=[doc], id_to_boost_map={})

-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=[chunk],
-            chunk_content_scores=[1.0],
-            tenant_id=TEST_TENANT_ID,
+        enricher = adapter.prepare_enrichment(
            context=context,
+            tenant_id=TEST_TENANT_ID,
+            chunks=[chunk],
        )
+        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

-        assert len(result.chunks) == 1
-        aware_chunk = result.chunks[0]
        assert persona.id in aware_chunk.personas
        assert aware_chunk.user_project == []

@@ -190,15 +188,13 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=[chunk],
-            chunk_content_scores=[1.0],
-            tenant_id=TEST_TENANT_ID,
+        enricher = adapter.prepare_enrichment(
            context=context,
+            tenant_id=TEST_TENANT_ID,
+            chunks=[chunk],
        )
+        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

-        assert len(result.chunks) == 1
-        aware_chunk = result.chunks[0]
        assert project.id in aware_chunk.user_project
        assert aware_chunk.personas == []

@@ -229,14 +225,13 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=[chunk],
-            chunk_content_scores=[1.0],
-            tenant_id=TEST_TENANT_ID,
+        enricher = adapter.prepare_enrichment(
            context=context,
+            tenant_id=TEST_TENANT_ID,
+            chunks=[chunk],
        )
+        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

-        aware_chunk = result.chunks[0]
        assert persona.id in aware_chunk.personas
        assert project.id in aware_chunk.user_project

@@ -261,14 +256,13 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=[chunk],
-            chunk_content_scores=[1.0],
-            tenant_id=TEST_TENANT_ID,
+        enricher = adapter.prepare_enrichment(
            context=context,
+            tenant_id=TEST_TENANT_ID,
+            chunks=[chunk],
        )
+        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

-        aware_chunk = result.chunks[0]
        assert aware_chunk.personas == []
        assert aware_chunk.user_project == []

@@ -300,12 +294,11 @@ class TestAdapterWritesBothMetadataFields:
            updatable_docs=[chunk.source_document], id_to_boost_map={}
        )

-        result = adapter.build_metadata_aware_chunks(
-            chunks_with_embeddings=[chunk],
-            chunk_content_scores=[1.0],
-            tenant_id=TEST_TENANT_ID,
+        enricher = adapter.prepare_enrichment(
            context=context,
+            tenant_id=TEST_TENANT_ID,
+            chunks=[chunk],
        )
+        aware_chunk = enricher.enrich_chunk(chunk, 1.0)

-        aware_chunk = result.chunks[0]
        assert set(aware_chunk.personas) == {persona_a.id, persona_b.id}
--- a/backend/tests/external_dependency_unit/celery/test_user_file_processing_queue.py
+++ b/backend/tests/external_dependency_unit/celery/test_user_file_processing_queue.py
@@ -90,8 +90,17 @@ def _patch_task_app(task: Any, mock_app: MagicMock) -> Generator[None, None, Non
    task only.
    """
    task_instance = task.run.__self__
-    with patch.object(
-        type(task_instance), "app", new_callable=PropertyMock, return_value=mock_app
+    with (
+        patch.object(
+            type(task_instance),
+            "app",
+            new_callable=PropertyMock,
+            return_value=mock_app,
+        ),
+        patch(
+            "onyx.background.celery.tasks.user_file_processing.tasks.celery_get_broker_client",
+            return_value=MagicMock(),
+        ),
    ):
        yield

--- a/backend/tests/external_dependency_unit/document_index/test_document_index.py
+++ b/backend/tests/external_dependency_unit/document_index/test_document_index.py
@@ -6,6 +6,7 @@ These tests assume Vespa and OpenSearch are running.
 import time
 import uuid
 from collections.abc import Generator
+from collections.abc import Iterator

 import httpx
 import pytest
@@ -21,6 +22,7 @@ from onyx.document_index.opensearch.opensearch_document_index import (
 )
 from onyx.document_index.vespa.index import VespaIndex
 from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
+from onyx.indexing.models import DocMetadataAwareIndexChunk
 from tests.external_dependency_unit.constants import TEST_TENANT_ID
 from tests.external_dependency_unit.document_index.conftest import EMBEDDING_DIM
 from tests.external_dependency_unit.document_index.conftest import make_chunk
@@ -201,3 +203,25 @@ class TestDocumentIndexNew:
            assert len(result_map) == 2
            assert result_map[existing_doc] is True
            assert result_map[new_doc] is False
+
+    def test_index_accepts_generator(
+        self,
+        document_indices: list[DocumentIndexNew],
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        """index() accepts a generator (any iterable), not just a list."""
+        for document_index in document_indices:
+            doc_id = f"test_gen_{uuid.uuid4().hex[:8]}"
+            metadata = make_indexing_metadata([doc_id], old_counts=[0], new_counts=[3])
+
+            def chunk_gen() -> Iterator[DocMetadataAwareIndexChunk]:
+                for i in range(3):
+                    yield make_chunk(doc_id, chunk_id=i)
+
+            results = document_index.index(
+                chunks=chunk_gen(), indexing_metadata=metadata
+            )
+
+            assert len(results) == 1
+            assert results[0].document_id == doc_id
+            assert results[0].already_existed is False
--- a/backend/tests/external_dependency_unit/document_index/test_document_index_old.py
+++ b/backend/tests/external_dependency_unit/document_index/test_document_index_old.py
@@ -5,6 +5,7 @@ These tests assume Vespa and OpenSearch are running.

 import time
 from collections.abc import Generator
+from collections.abc import Iterator

 import pytest

@@ -166,3 +167,29 @@ class TestDocumentIndexOld:
                batch_retrieval=True,
            )
            assert len(inference_chunks) == 0
+
+    def test_index_accepts_generator(
+        self,
+        document_indices: list[DocumentIndex],
+        tenant_context: None,  # noqa: ARG002
+    ) -> None:
+        """index() accepts a generator (any iterable), not just a list."""
+        for document_index in document_indices:
+
+            def chunk_gen() -> Iterator[DocMetadataAwareIndexChunk]:
+                for i in range(3):
+                    yield make_chunk("test_doc_gen", chunk_id=i)
+
+            index_batch_params = IndexBatchParams(
+                doc_id_to_previous_chunk_cnt={"test_doc_gen": 0},
+                doc_id_to_new_chunk_cnt={"test_doc_gen": 3},
+                tenant_id=get_current_tenant_id(),
+                large_chunks_enabled=False,
+            )
+
+            results = document_index.index(chunk_gen(), index_batch_params)
+
+            assert len(results) == 1
+            record = results.pop()
+            assert record.document_id == "test_doc_gen"
+            assert record.already_existed is False
--- a/backend/tests/integration/tests/streaming_endpoints/test_chat_file_attachment.py
+++ b/backend/tests/integration/tests/streaming_endpoints/test_chat_file_attachment.py
@@ -1,3 +1,9 @@
+import mimetypes
+from typing import Any
+
+import requests
+
+from tests.integration.common_utils.constants import API_SERVER_URL
 from tests.integration.common_utils.managers.chat import ChatSessionManager
 from tests.integration.common_utils.managers.file import FileManager
 from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
@@ -79,3 +85,90 @@ def test_send_message_with_text_file_attachment(admin_user: DATestUser) -> None:
    assert (
        "third line" in response.full_message.lower()
    ), "Chat response should contain the contents of the file"
+
+
+def _set_token_threshold(admin_user: DATestUser, threshold_k: int) -> None:
+    """Set the file token count threshold via admin settings API."""
+    response = requests.put(
+        f"{API_SERVER_URL}/admin/settings",
+        json={"file_token_count_threshold_k": threshold_k},
+        headers=admin_user.headers,
+    )
+    response.raise_for_status()
+
+
+def _upload_raw(
+    filename: str,
+    content: bytes,
+    user: DATestUser,
+) -> dict[str, Any]:
+    """Upload a file and return the full JSON response (user_files + rejected_files)."""
+    mime_type, _ = mimetypes.guess_type(filename)
+    headers = user.headers.copy()
+    headers.pop("Content-Type", None)
+
+    response = requests.post(
+        f"{API_SERVER_URL}/user/projects/file/upload",
+        files=[("files", (filename, content, mime_type or "application/octet-stream"))],
+        headers=headers,
+    )
+    response.raise_for_status()
+    return response.json()
+
+
+def test_csv_over_token_threshold_uploaded_not_indexed(
+    admin_user: DATestUser,
+) -> None:
+    """CSV exceeding token threshold is uploaded (accepted) but skips indexing."""
+    _set_token_threshold(admin_user, threshold_k=1)
+    try:
+        # ~2000 tokens with default tokenizer, well over 1K threshold
+        content = ("x " * 100 + "\n") * 20
+        result = _upload_raw("large.csv", content.encode(), admin_user)
+
+        assert len(result["user_files"]) == 1, "CSV should be accepted"
+        assert len(result["rejected_files"]) == 0, "CSV should not be rejected"
+        assert (
+            result["user_files"][0]["status"] == "SKIPPED"
+        ), "CSV over threshold should be SKIPPED (uploaded but not indexed)"
+        assert (
+            result["user_files"][0]["chunk_count"] is None
+        ), "Skipped file should have no chunks"
+    finally:
+        _set_token_threshold(admin_user, threshold_k=200)
+
+
+def test_csv_under_token_threshold_uploaded_and_indexed(
+    admin_user: DATestUser,
+) -> None:
+    """CSV under token threshold is uploaded and queued for indexing."""
+    _set_token_threshold(admin_user, threshold_k=200)
+    try:
+        content = "col1,col2\na,b\n"
+        result = _upload_raw("small.csv", content.encode(), admin_user)
+
+        assert len(result["user_files"]) == 1, "CSV should be accepted"
+        assert len(result["rejected_files"]) == 0, "CSV should not be rejected"
+        assert (
+            result["user_files"][0]["status"] == "PROCESSING"
+        ), "CSV under threshold should be PROCESSING (queued for indexing)"
+    finally:
+        _set_token_threshold(admin_user, threshold_k=200)
+
+
+def test_txt_over_token_threshold_rejected(
+    admin_user: DATestUser,
+) -> None:
+    """Non-exempt file exceeding token threshold is rejected entirely."""
+    _set_token_threshold(admin_user, threshold_k=1)
+    try:
+        # ~2000 tokens, well over 1K threshold. Unlike CSV, .txt is not
+        # exempt from the threshold so the file should be rejected.
+        content = ("x " * 100 + "\n") * 20
+        result = _upload_raw("big.txt", content.encode(), admin_user)
+
+        assert len(result["user_files"]) == 0, "File should not be accepted"
+        assert len(result["rejected_files"]) == 1, "File should be rejected"
+        assert "token limit" in result["rejected_files"][0]["reason"].lower()
+    finally:
+        _set_token_threshold(admin_user, threshold_k=200)
--- a/backend/tests/unit/ee/onyx/hooks/init.py
+++ b/backend/tests/unit/ee/onyx/hooks/init.py
--- a/backend/tests/unit/ee/onyx/hooks/test_executor.py
+++ b/backend/tests/unit/ee/onyx/hooks/test_executor.py
@@ -9,11 +9,11 @@ import httpx
 import pytest
 from pydantic import BaseModel

+from ee.onyx.hooks.executor import _execute_hook_impl as execute_hook
 from onyx.db.enums import HookFailStrategy
 from onyx.db.enums import HookPoint
 from onyx.error_handling.error_codes import OnyxErrorCode
 from onyx.error_handling.exceptions import OnyxError
-from onyx.hooks.executor import execute_hook
 from onyx.hooks.executor import HookSkipped
 from onyx.hooks.executor import HookSoftFailed
 from onyx.hooks.points.query_processing import QueryProcessingResponse
@@ -118,28 +118,30 @@ def db_session() -> MagicMock:


@pytest.mark.parametrize(
-    "hooks_available,hook",
+    "multi_tenant,hook",
    [
-        # HOOKS_AVAILABLE=False exits before the DB lookup — hook is irrelevant.
-        pytest.param(False, None, id="hooks_not_available"),
-        pytest.param(True, None, id="hook_not_found"),
-        pytest.param(True, _make_hook(is_active=False), id="hook_inactive"),
-        pytest.param(True, _make_hook(endpoint_url=None), id="no_endpoint_url"),
+        # MULTI_TENANT=True exits before the DB lookup — hook is irrelevant.
+        pytest.param(True, None, id="multi_tenant"),
+        pytest.param(False, None, id="hook_not_found"),
+        pytest.param(False, _make_hook(is_active=False), id="hook_inactive"),
+        pytest.param(False, _make_hook(endpoint_url=None), id="no_endpoint_url"),
    ],
 )
 def test_early_exit_returns_skipped_with_no_db_writes(
    db_session: MagicMock,
-    hooks_available: bool,
+    multi_tenant: bool,
    hook: MagicMock | None,
 ) -> None:
    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", hooks_available),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", multi_tenant),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch(
+            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
+        ) as mock_log,
    ):
        result = execute_hook(
            db_session=db_session,
@@ -164,14 +166,16 @@ def test_success_returns_validated_model_and_sets_reachable(
    hook = _make_hook()

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch(
+            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
+        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(mock_client_cls, response=_make_response())
@@ -195,14 +199,14 @@ def test_success_skips_reachable_write_when_already_true(db_session: MagicMock)
    hook = _make_hook(is_reachable=True)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch("ee.onyx.hooks.executor.create_hook_execution_log__no_commit"),
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(mock_client_cls, response=_make_response())
@@ -224,14 +228,16 @@ def test_non_dict_json_response_is_a_failure(db_session: MagicMock) -> None:
    hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch(
+            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
+        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(
@@ -258,14 +264,16 @@ def test_json_decode_failure_is_a_failure(db_session: MagicMock) -> None:
    hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch(
+            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
+        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(
@@ -384,14 +392,14 @@ def test_http_failure_paths(
    hook = _make_hook(fail_strategy=fail_strategy)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch("ee.onyx.hooks.executor.create_hook_execution_log__no_commit"),
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(mock_client_cls, side_effect=exception)
@@ -443,14 +451,14 @@ def test_authorization_header(
    hook = _make_hook(api_key=api_key)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit"),
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit"),
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit"),
+        patch("ee.onyx.hooks.executor.create_hook_execution_log__no_commit"),
        patch("httpx.Client") as mock_client_cls,
    ):
        mock_client = _setup_client(mock_client_cls, response=_make_response())
@@ -489,13 +497,13 @@ def test_persist_session_failure_is_swallowed(
    hook = _make_hook(fail_strategy=HookFailStrategy.HARD)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch(
-            "onyx.hooks.executor.get_session_with_current_tenant",
+            "ee.onyx.hooks.executor.get_session_with_current_tenant",
            side_effect=RuntimeError("DB unavailable"),
        ),
        patch("httpx.Client") as mock_client_cls,
@@ -556,14 +564,16 @@ def test_response_validation_failure_respects_fail_strategy(
    hook = _make_hook(fail_strategy=fail_strategy)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
-        patch("onyx.hooks.executor.update_hook__no_commit") as mock_update,
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.update_hook__no_commit") as mock_update,
+        patch(
+            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
+        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        # Response payload is missing required_field → ValidationError
@@ -619,13 +629,13 @@ def test_unexpected_exception_in_inner_respects_fail_strategy(
    hook = _make_hook(fail_strategy=fail_strategy)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
        patch(
-            "onyx.hooks.executor._execute_hook_inner",
+            "ee.onyx.hooks.executor._execute_hook_inner",
            side_effect=ValueError("unexpected bug"),
        ),
    ):
@@ -658,17 +668,19 @@ def test_is_reachable_failure_does_not_prevent_log(db_session: MagicMock) -> Non
    hook = _make_hook(fail_strategy=HookFailStrategy.SOFT)

    with (
-        patch("onyx.hooks.executor.HOOKS_AVAILABLE", True),
+        patch("ee.onyx.hooks.executor.MULTI_TENANT", False),
        patch(
-            "onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
+            "ee.onyx.hooks.executor.get_non_deleted_hook_by_hook_point",
            return_value=hook,
        ),
-        patch("onyx.hooks.executor.get_session_with_current_tenant"),
+        patch("ee.onyx.hooks.executor.get_session_with_current_tenant"),
        patch(
-            "onyx.hooks.executor.update_hook__no_commit",
+            "ee.onyx.hooks.executor.update_hook__no_commit",
            side_effect=OnyxError(OnyxErrorCode.NOT_FOUND, "hook deleted"),
        ),
-        patch("onyx.hooks.executor.create_hook_execution_log__no_commit") as mock_log,
+        patch(
+            "ee.onyx.hooks.executor.create_hook_execution_log__no_commit"
+        ) as mock_log,
        patch("httpx.Client") as mock_client_cls,
    ):
        _setup_client(mock_client_cls, side_effect=httpx.ConnectError("refused"))
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Danelegend	de0f42f6cc	refactor(files): Port csv type to tabular (#9785 )	2026-04-01 03:37:13 +00:00
Raunak Bhagat	7ecefdc90f	refactor(opal): split Card sizeVariant into padding + rounding (#9823 )	2026-04-01 03:32:08 +00:00
Danelegend	21fc013893	feat(file-upload): Upload files exceeding tokens but skip indexing (#9751 )	2026-04-01 02:14:51 +00:00
Justin Tahara	a1c3a68ba4	fix(perf): optimize chat sessions query to prevent DB cascading failures (#9802 )	2026-04-01 01:28:37 +00:00
Evan Lohn	4fb175ae65	fix: install early exit (#9818 )	2026-04-01 01:09:05 +00:00
Evan Lohn	800ad326df	fix: discord token validation (#9817 )	2026-04-01 01:08:38 +00:00
Bo-Onyx	6b920e8a3e	feat(hook): refactor under ee (#9776 )	2026-04-01 01:07:55 +00:00
Justin Tahara	ef3760796d	feat(rds): Adding IO Metrics Alarms (#9789 )	2026-04-01 01:07:45 +00:00
Jessica Singh	fa5b90df92	fix(connectors): fix reindex on paused file connectors (#9812 )	2026-03-31 23:10:09 +00:00
Evan Lohn	53953ac4fa	chore: fix indexing log2 (#9811 )	2026-03-31 21:02:54 +00:00
Yuhong Sun	26bb5c990c	chore: Rag script for benchmark/regression (#9781 )	2026-03-31 20:46:17 +00:00
Evan Lohn	27b4ed301f	chore: fix batch logging (#9808 )	2026-03-31 20:10:33 +00:00
Jessica Singh	93ec270ccc	feat(voice): VAD auto-stop only when auto-send is enabled (#9809 )	2026-03-31 19:31:31 +00:00
Raunak Bhagat	9e2d6c8a1d	refactor(admin): code-interpreter (#9790 )	2026-03-31 19:08:55 +00:00
Nikolas Garza	fc934214d0	perf(swr): add SWR_KEYS registry and skip revalidation for stable hooks (#9695 )	2026-03-31 19:07:42 +00:00
Raunak Bhagat	48fc45a0cd	refactor(admin): web-search (#9761 )	2026-03-31 19:04:18 +00:00
Jessica Singh	009266e53e	fix(llm): when multiple providers are same type ensure name is prioritized when default (#9777 )	2026-03-31 19:03:38 +00:00
Raunak Bhagat	ffb9df7308	refactor(admin): LLM Config (#9806 )	2026-03-31 19:03:17 +00:00
Raunak Bhagat	b0f5e0b8d9	refactor(admin): image-generation (#9769 )	2026-03-31 18:13:23 +00:00
acaprau	43aea5d614	chore(opensearch): Add Grafana dashboard for retrieval (#9657 ) Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>	2026-03-31 16:56:40 +00:00
Bo-Onyx	593d82f431	feat(hook): hook status and logs (#9770 )	2026-03-31 16:10:12 +00:00
Ben Wu	adf5691b5f	feat(canvas 2/4): Canvas Connector data fetching (#9386 )	2026-03-31 03:07:05 +00:00
Nikolas Garza	c1a8a5bd83	fix(tenants): run migrations on pool tenants before assigning to new users (#9788 )	2026-03-31 01:24:01 +00:00
Justin Tahara	8fd486da99	feat(rds): Add Freeable Memory alert (#9787 )	2026-03-31 00:59:30 +00:00
Raunak Bhagat	4bda4d3637	refactor: migrate away from `cards/Select` (#9771 )	2026-03-31 00:27:01 +00:00
Justin Tahara	13c25eadad	feat(rds): Adding CPU Alerts (#9784 )	2026-03-31 00:22:15 +00:00
Justin Tahara	1f244e6388	feat(eks): Adding Cloudwatch logging (#9783 )	2026-03-30 23:52:44 +00:00
Nikolas Garza	18b0416d30	feat(sentry): enable frontend source map uploads in cloud CI (#9775 )	2026-03-30 23:42:57 +00:00
Nikolas Garza	4bc0bc1efb	feat(helm): add Grafana dashboard provisioning (#9725 )	2026-03-30 23:42:32 +00:00
Justin Tahara	1555217061	feat(rds): Adding RDS Snapshosts (#9779 )	2026-03-30 23:17:08 +00:00
Nikolas Garza	d177a833f0	feat(sentry): add release tracking to backend and frontend (#9773 )	2026-03-30 22:35:38 +00:00
Jamison Lahman	086997d3c5	chore(types): fix IconButton size props (#9772 )	2026-03-30 21:40:25 +00:00
dependabot[bot]	dccec78397	chore(deps): bump helm/chart-testing-action from b5eebdd9998021f29756c53432f48dab66394810 to 2e2940618cb426dce2999631d543b53cdcfc8527 (#9764 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-03-30 14:41:01 -07:00
Jamison Lahman	0123133621	chore(fe): polish Query History table (#9767 )	2026-03-30 21:30:13 +00:00
dependabot[bot]	0b9d154a73	chore(deps): bump runs-on/cache from 50350ad4242587b6c8c2baa2e740b1bc11285ff4 to a5f51d6f3fece787d03b7b4e981c82538a0654ed (#9763 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-03-30 13:54:43 -07:00
dependabot[bot]	6e65e55bf5	chore(deps): bump actions/cache from 5.0.3 to 5.0.4 (#9765 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-03-30 13:46:53 -07:00
Raunak Bhagat	3f9e208759	feat(opal): `SelectCard` + `CardHeaderLayout` (#9760 )	2026-03-30 19:54:54 +00:00
dependabot[bot]	fb8edda14a	chore(deps): bump pygments from 2.19.2 to 2.20.0 (#9757 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-03-30 18:30:18 +00:00
Jamison Lahman	58decd8a6b	chore(gha): prefer `ci-protected` env (#9728 )	2026-03-30 17:20:54 +00:00
Danelegend	e97204d9cc	feat(indexing): Batch chunks during doc processing (#9468 )	2026-03-30 11:49:36 +00:00
Danelegend	44ab02c94f	refactor(indexing): Refactor indexing vector db abstraction (#9653 )	2026-03-30 09:57:16 +00:00
Danelegend	a98cc30f25	refactor(indexing): Change adapters to support iterables (#9469 )	2026-03-30 01:43:10 +00:00
Danelegend	a709dcb8fa	feat(indexing): Max chunk processing (#9400 )	2026-03-30 00:10:24 +00:00
Raunak Bhagat	a3dfe6aa1b	refactor(opal): unify Interactive color system (#9717 )	2026-03-28 00:40:23 +00:00
Nikolas Garza	23e4d55fb1	perf(swr): convert raw-fetch hooks to SWR to eliminate duplicate requests (#9694 )	2026-03-28 00:26:20 +00:00
Jamison Lahman	470cc85f83	feat(cli): `onyx-cli serve` over SSH (#9726 )	2026-03-27 23:46:14 +00:00
Justin Tahara	64d9be5a41	fix(openpyxl): Colors must be aRGB hex values (#9727 )	2026-03-27 23:14:36 +00:00
roshan	71a5b469b0	feat(widget): add citation badges to chat widget (#9714 )	2026-03-27 22:39:46 +00:00
Evan Lohn	462eb0697f	fix: Anthropic litellm thinking workaround (#9713 )	2026-03-27 21:03:05 +00:00
dependabot[bot]	b708dc8796	chore(deps): bump langchain-core from 1.2.11 to 1.2.22 (#9720 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-03-27 20:50:19 +00:00
dependabot[bot]	c9e2c32f55	chore(deps): bump cryptography from 46.0.5 to 46.0.6 (#9721 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-03-27 20:48:59 +00:00
Jamison Lahman	d725df62e7	feat(cli): `--version` and `validate-config` warn if backend version is incompatible (#9715 )	2026-03-27 13:13:16 -07:00
Jamison Lahman	d1460972b6	fix(cli): `onyx-cli --version` interpolation (#9712 )	2026-03-27 19:22:31 +00:00
Jamison Lahman	706872f0b7	chore(deps): upgrade go deps (#9711 )	2026-03-27 12:24:25 -07:00
Jamison Lahman	ed3856be2b	chore(release): build all CLI wheels before publishing (#9710 )	2026-03-27 19:04:02 +00:00
Jamison Lahman	6326c7f0b9	chore(gha): fix git error after helm release migration to alpine base image (#9709 )	2026-03-27 11:21:34 -07:00
Jamison Lahman	40420fc4e6	chore(gha): helm release upstream nits (#9708 )	2026-03-27 11:10:41 -07:00
Nikolas Garza	1a2b6a66cc	fix(celery): use broker connection pool to prevent Redis connection leak (#9682 )	2026-03-27 17:53:49 +00:00
Jamison Lahman	d1b1529ccf	chore(gha): fix helm release after image update (#9707 )	2026-03-27 10:37:43 -07:00
Bo-Onyx	fedd9c76e5	feat(hook): admin page create or edit hook (#9690 )	2026-03-27 17:10:14 +00:00
Jamison Lahman	0b34b40b79	chore(gha): pin helm release docker image (#9706 )	2026-03-27 10:16:41 -07:00
Yuhong Sun	fe82ddb1b9	Update README.md (#9703 )	2026-03-27 10:03:56 -07:00
Jamison Lahman	32d3d70525	chore(playwright): deflake `settings_pages.spec.ts` (#9684 )	2026-03-27 15:54:23 +00:00
Jamison Lahman	40b9e10890	chore(devtools): upgrade `ods`: 0.7.1->0.7.2 (#9701 )	2026-03-27 08:17:42 -07:00
dependabot[bot]	e21b204b8a	chore(deps): bump brace-expansion in /backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web (#9698 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-03-27 08:10:15 -07:00
Jamison Lahman	2f672b3a4f	fix(fe): Popover content doesnt overflow on small screens (#9612 )	2026-03-27 08:07:52 -07:00
Nikolas Garza	cf19d0df4f	feat(helm): add Prometheus metrics ports and Services for celery workers (#9630 )	2026-03-27 08:03:48 +00:00
Danelegend	86a6a4c134	refactor(indexing): Vespa & Opensearch index function use Iterable (#9384 )	2026-03-27 04:36:59 +00:00