fix: remove unused Content import in NonAdminStep

refactor(fe): move onboarding types to interfaces/onboarding.ts
Move shared onboarding type definitions (OnboardingStep, OnboardingState, OnboardingActions, etc.) from sections/onboarding/types.ts to interfaces/onboarding.ts. Update all 20 import sites to use the new path.
2026-03-05 15:45:46 +00:00 · 2026-03-03 10:55:05 -08:00 · 2026-03-03 10:51:57 -08:00 · 2026-03-03 10:49:34 -08:00 · 2026-03-03 10:47:16 -08:00 · 2026-03-03 10:38:52 -08:00
429 changed files with 3276 additions and 11893 deletions
--- a/.github/workflows/nightly-llm-provider-chat.yml
+++ b/.github/workflows/nightly-llm-provider-chat.yml
@@ -15,7 +15,6 @@ permissions:
 jobs:
  provider-chat-test:
    uses: ./.github/workflows/reusable-nightly-llm-provider-chat.yml
-    secrets: inherit
    permissions:
      contents: read
      id-token: write
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -335,6 +335,7 @@ jobs:
          # TODO(Nik): https://linear.app/onyx-app/issue/ENG-1/update-test-infra-to-use-test-license
          LICENSE_ENFORCEMENT_ENABLED=false
          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
+          USE_LIGHTWEIGHT_BACKGROUND_WORKER=false
          EOF
          fi

@@ -470,13 +471,13 @@ jobs:
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

-  onyx-lite-tests:
+  no-vectordb-tests:
    needs: [build-backend-image, build-integration-image]
    runs-on:
      [
        runs-on,
        runner=4cpu-linux-arm64,
-        "run-id=${{ github.run_id }}-onyx-lite-tests",
+        "run-id=${{ github.run_id }}-no-vectordb-tests",
        "extras=ecr-cache",
      ]
    timeout-minutes: 45
@@ -494,12 +495,13 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Create .env file for Onyx Lite Docker Compose
+      - name: Create .env file for no-vectordb Docker Compose
        env:
          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
          RUN_ID: ${{ github.run_id }}
        run: |
          cat <<EOF > deployment/docker_compose/.env
+          COMPOSE_PROFILES=s3-filestore
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
          LICENSE_ENFORCEMENT_ENABLED=false
          AUTH_TYPE=basic
@@ -507,23 +509,28 @@ jobs:
          POSTGRES_USE_NULL_POOL=true
          REQUIRE_EMAIL_VERIFICATION=false
          DISABLE_TELEMETRY=true
+          DISABLE_VECTOR_DB=true
          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID}
          INTEGRATION_TESTS_MODE=true
+          USE_LIGHTWEIGHT_BACKGROUND_WORKER=true
          EOF

-      # Start only the services needed for Onyx Lite (Postgres + API server)
-      - name: Start Docker containers (onyx-lite)
+      # Start only the services needed for no-vectordb mode (no Vespa, no model servers)
+      - name: Start Docker containers (no-vectordb)
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml up \
+          docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml up \
            relational_db \
+            cache \
+            minio \
            api_server \
+            background \
            -d
-        id: start_docker_onyx_lite
+        id: start_docker_no_vectordb

      - name: Wait for services to be ready
        run: |
-          echo "Starting wait-for-service script (onyx-lite)..."
+          echo "Starting wait-for-service script (no-vectordb)..."
          start_time=$(date +%s)
          timeout=300
          while true; do
@@ -545,14 +552,14 @@ jobs:
            sleep 5
          done

-      - name: Run Onyx Lite Integration Tests
+      - name: Run No-VectorDB Integration Tests
        uses: nick-fields/retry@ce71cc2ab81d554ebbe88c79ab5975992d79ba08 # ratchet:nick-fields/retry@v3
        with:
          timeout_minutes: 20
          max_attempts: 3
          retry_wait_seconds: 10
          command: |
-            echo "Running onyx-lite integration tests..."
+            echo "Running no-vectordb integration tests..."
            docker run --rm --network onyx_default \
              --name test-runner \
              -e POSTGRES_HOST=relational_db \
@@ -563,38 +570,39 @@ jobs:
              -e DB_READONLY_PASSWORD=password \
              -e POSTGRES_POOL_PRE_PING=true \
              -e POSTGRES_USE_NULL_POOL=true \
+              -e REDIS_HOST=cache \
              -e API_SERVER_HOST=api_server \
              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
              -e TEST_WEB_HOSTNAME=test-runner \
              ${{ env.RUNS_ON_ECR_CACHE }}:integration-test-${{ github.run_id }} \
              /app/tests/integration/tests/no_vectordb

-      - name: Dump API server logs (onyx-lite)
+      - name: Dump API server logs (no-vectordb)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml \
-            logs --no-color api_server > $GITHUB_WORKSPACE/api_server_onyx_lite.log || true
+          docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml \
+            logs --no-color api_server > $GITHUB_WORKSPACE/api_server_no_vectordb.log || true

-      - name: Dump all-container logs (onyx-lite)
+      - name: Dump all-container logs (no-vectordb)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml \
-            logs --no-color > $GITHUB_WORKSPACE/docker-compose-onyx-lite.log || true
+          docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml \
+            logs --no-color > $GITHUB_WORKSPACE/docker-compose-no-vectordb.log || true

-      - name: Upload logs (onyx-lite)
+      - name: Upload logs (no-vectordb)
        if: always()
        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
-          name: docker-all-logs-onyx-lite
-          path: ${{ github.workspace }}/docker-compose-onyx-lite.log
+          name: docker-all-logs-no-vectordb
+          path: ${{ github.workspace }}/docker-compose-no-vectordb.log

-      - name: Stop Docker containers (onyx-lite)
+      - name: Stop Docker containers (no-vectordb)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml down -v
+          docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml -f docker-compose.dev.yml down -v

  multitenant-tests:
    needs:
@@ -736,7 +744,7 @@ jobs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
    timeout-minutes: 45
-    needs: [integration-tests, onyx-lite-tests, multitenant-tests]
+    needs: [integration-tests, no-vectordb-tests, multitenant-tests]
    if: ${{ always() }}
    steps:
      - name: Check job status
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -268,11 +268,10 @@ jobs:
          persist-credentials: false

      - name: Setup node
-        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
        with:
          node-version: 22
-          cache: "npm" # zizmor: ignore[cache-poisoning]
+          cache: "npm"
          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
@@ -280,7 +279,6 @@ jobs:
        run: npm ci

      - name: Cache playwright cache
-        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
        with:
          path: ~/.cache/ms-playwright
@@ -592,108 +590,6 @@ jobs:
          name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log

-  playwright-tests-lite:
-    needs: [build-web-image, build-backend-image]
-    name: Playwright Tests (lite)
-    runs-on:
-      - runs-on
-      - runner=4cpu-linux-arm64
-      - "run-id=${{ github.run_id }}-playwright-tests-lite"
-      - "extras=ecr-cache"
-    timeout-minutes: 30
-    steps:
-      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
-
-      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # ratchet:actions/checkout@v6
-        with:
-          persist-credentials: false
-
-      - name: Setup node
-        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
-        uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # ratchet:actions/setup-node@v4
-        with:
-          node-version: 22
-          cache: "npm" # zizmor: ignore[cache-poisoning]
-          cache-dependency-path: ./web/package-lock.json
-
-      - name: Install node dependencies
-        working-directory: ./web
-        run: npm ci
-
-      - name: Cache playwright cache
-        # zizmor: ignore[cache-poisoning] ephemeral runners; no release artifacts
-        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
-        with:
-          path: ~/.cache/ms-playwright
-          key: ${{ runner.os }}-playwright-npm-${{ hashFiles('web/package-lock.json') }}
-          restore-keys: |
-            ${{ runner.os }}-playwright-npm-
-
-      - name: Install playwright browsers
-        working-directory: ./web
-        run: npx playwright install --with-deps
-
-      - name: Create .env file for Docker Compose
-        env:
-          OPENAI_API_KEY_VALUE: ${{ env.OPENAI_API_KEY }}
-          ECR_CACHE: ${{ env.RUNS_ON_ECR_CACHE }}
-          RUN_ID: ${{ github.run_id }}
-        run: |
-          cat <<EOF > deployment/docker_compose/.env
-          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
-          LICENSE_ENFORCEMENT_ENABLED=false
-          AUTH_TYPE=basic
-          INTEGRATION_TESTS_MODE=true
-          GEN_AI_API_KEY=${OPENAI_API_KEY_VALUE}
-          MOCK_LLM_RESPONSE=true
-          REQUIRE_EMAIL_VERIFICATION=false
-          DISABLE_TELEMETRY=true
-          ONYX_BACKEND_IMAGE=${ECR_CACHE}:playwright-test-backend-${RUN_ID}
-          ONYX_WEB_SERVER_IMAGE=${ECR_CACHE}:playwright-test-web-${RUN_ID}
-          EOF
-
-      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
-      # https://docs.docker.com/docker-hub/usage/
-      - name: Login to Docker Hub
-        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # ratchet:docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Start Docker containers (lite)
-        run: |
-          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml -f docker-compose.dev.yml up -d
-        id: start_docker
-
-      - name: Run Playwright tests (lite)
-        working-directory: ./web
-        run: npx playwright test --project lite
-
-      - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
-        if: always()
-        with:
-          name: playwright-test-results-lite-${{ github.run_id }}
-          path: ./web/output/playwright/
-          retention-days: 30
-
-      - name: Save Docker logs
-        if: success() || failure()
-        env:
-          WORKSPACE: ${{ github.workspace }}
-        run: |
-          cd deployment/docker_compose
-          docker compose logs > docker-compose.log
-          mv docker-compose.log ${WORKSPACE}/docker-compose.log
-
-      - name: Upload logs
-        if: success() || failure()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
-        with:
-          name: docker-logs-lite-${{ github.run_id }}
-          path: ${{ github.workspace }}/docker-compose.log
-
  # Post a single combined visual regression comment after all matrix jobs finish
  visual-regression-comment:
    needs: [playwright-tests]
@@ -790,7 +686,7 @@ jobs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
    timeout-minutes: 45
-    needs: [playwright-tests, playwright-tests-lite]
+    needs: [playwright-tests]
    if: ${{ always() }}
    steps:
      - name: Check job status
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -40,7 +40,19 @@
      }
    },
    {
-      "name": "Celery",
+      "name": "Celery (lightweight mode)",
+      "configurations": [
+        "Celery primary",
+        "Celery background",
+        "Celery beat"
+      ],
+      "presentation": {
+        "group": "1"
+      },
+      "stopAll": true
+    },
+    {
+      "name": "Celery (standard mode)",
      "configurations": [
        "Celery primary",
        "Celery light",
@@ -241,6 +253,35 @@
      },
      "consoleTitle": "Celery light Console"
    },
+    {
+      "name": "Celery background",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "INFO",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.background",
+        "worker",
+        "--pool=threads",
+        "--concurrency=20",
+        "--prefetch-multiplier=4",
+        "--loglevel=INFO",
+        "--hostname=background@%n",
+        "-Q",
+        "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup,index_attempt_cleanup,docprocessing,connector_doc_fetching,connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,kg_processing,monitoring,user_file_processing,user_file_project_sync,user_file_delete,opensearch_migration"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery background Console"
+    },
    {
      "name": "Celery heavy",
      "type": "debugpy",
@@ -485,6 +526,21 @@
        "group": "3"
      }
    },
+    {
+      "name": "Clear and Restart OpenSearch Container",
+      // Generic debugger type, required arg but has no bearing on bash.
+      "type": "node",
+      "request": "launch",
+      "runtimeExecutable": "bash",
+      "runtimeArgs": [
+        "${workspaceFolder}/backend/scripts/restart_opensearch_container.sh"
+      ],
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
+      "presentation": {
+        "group": "3"
+      }
+    },
    {
      "name": "Eval CLI",
      "type": "debugpy",
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -86,6 +86,37 @@ Onyx uses Celery for asynchronous task processing with multiple specialized work
     - Monitoring tasks (every 5 minutes)
     - Cleanup tasks (hourly)

+#### Worker Deployment Modes
+
+Onyx supports two deployment modes for background workers, controlled by the `USE_LIGHTWEIGHT_BACKGROUND_WORKER` environment variable:
+
+**Lightweight Mode** (default, `USE_LIGHTWEIGHT_BACKGROUND_WORKER=true`):
+
+- Runs a single consolidated `background` worker that handles all background tasks:
+  - Light worker tasks (Vespa operations, permissions sync, deletion)
+  - Document processing (indexing pipeline)
+  - Document fetching (connector data retrieval)
+  - Pruning operations (from `heavy` worker)
+  - Knowledge graph processing (from `kg_processing` worker)
+  - Monitoring tasks (from `monitoring` worker)
+  - User file processing (from `user_file_processing` worker)
+- Lower resource footprint (fewer worker processes)
+- Suitable for smaller deployments or development environments
+- Default concurrency: 20 threads (increased to handle combined workload)
+
+**Standard Mode** (`USE_LIGHTWEIGHT_BACKGROUND_WORKER=false`):
+
+- Runs separate specialized workers as documented above (light, docprocessing, docfetching, heavy, kg_processing, monitoring, user_file_processing)
+- Better isolation and scalability
+- Can scale individual workers independently based on workload
+- Suitable for production deployments with higher load
+
+The deployment mode affects:
+
+- **Backend**: Worker processes spawned by supervisord or dev scripts
+- **Helm**: Which Kubernetes deployments are created
+- **Dev Environment**: Which workers `dev_run_background_jobs.py` spawns
+
 #### Key Features

 - **Thread-based Workers**: All workers use thread pools (not processes) for stability
@@ -586,45 +617,6 @@ Keep it high level. You can reference certain files or functions though.

 Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.

-## Error Handling
-
-**Always raise `OnyxError` from `onyx.error_handling.exceptions` instead of `HTTPException`.
-Never hardcode status codes or use `starlette.status` / `fastapi.status` constants directly.**
-
-A global FastAPI exception handler converts `OnyxError` into a JSON response with the standard
-`{"error_code": "...", "message": "..."}` shape. This eliminates boilerplate and keeps error
-handling consistent across the entire backend.
-
-```python
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
-
-# ✅ Good
-raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
-
-# ✅ Good — no extra message needed
-raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)
-
-# ✅ Good — upstream service with dynamic status code
-raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=upstream_status)
-
-# ❌ Bad — using HTTPException directly
-raise HTTPException(status_code=404, detail="Session not found")
-
-# ❌ Bad — starlette constant
-raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Access denied")
-```
-
-Available error codes are defined in `backend/onyx/error_handling/error_codes.py`. If a new error
-category is needed, add it there first — do not invent ad-hoc codes.
-
-**Upstream service errors:** When forwarding errors from an upstream service where the HTTP
-status code is dynamic (comes from the upstream response), use `status_code_override`:
-
-```python
-raise OnyxError(OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=e.response.status_code)
-```
-
 ## Best Practices

 In addition to the other content in this file, best practices for contributing
--- a/backend/ee/onyx/background/celery/apps/background.py
+++ b/backend/ee/onyx/background/celery/apps/background.py
@@ -0,0 +1,15 @@
+from onyx.background.celery.apps import app_base
+from onyx.background.celery.apps.background import celery_app
+
+
+celery_app.autodiscover_tasks(
+    app_base.filter_task_modules(
+        [
+            "ee.onyx.background.celery.tasks.doc_permission_syncing",
+            "ee.onyx.background.celery.tasks.external_group_syncing",
+            "ee.onyx.background.celery.tasks.cleanup",
+            "ee.onyx.background.celery.tasks.tenant_provisioning",
+            "ee.onyx.background.celery.tasks.query_history",
+        ]
+    )
+)
--- a/backend/ee/onyx/db/license.py
+++ b/backend/ee/onyx/db/license.py
@@ -11,10 +11,11 @@ from ee.onyx.server.license.models import LicenseMetadata
 from ee.onyx.server.license.models import LicensePayload
 from ee.onyx.server.license.models import LicenseSource
 from onyx.auth.schemas import UserRole
-from onyx.cache.factory import get_cache_backend
 from onyx.configs.constants import ANONYMOUS_USER_EMAIL
 from onyx.db.models import License
 from onyx.db.models import User
+from onyx.redis.redis_pool import get_redis_client
+from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.contextvars import get_current_tenant_id
@@ -141,7 +142,7 @@ def get_used_seats(tenant_id: str | None = None) -> int:

 def get_cached_license_metadata(tenant_id: str | None = None) -> LicenseMetadata | None:
    """
-    Get license metadata from cache.
+    Get license metadata from Redis cache.

    Args:
        tenant_id: Tenant ID (for multi-tenant deployments)
@@ -149,34 +150,38 @@ def get_cached_license_metadata(tenant_id: str | None = None) -> LicenseMetadata
    Returns:
        LicenseMetadata if cached, None otherwise
    """
-    cache = get_cache_backend(tenant_id=tenant_id)
-    cached = cache.get(LICENSE_METADATA_KEY)
-    if not cached:
-        return None
+    tenant = tenant_id or get_current_tenant_id()
+    redis_client = get_redis_replica_client(tenant_id=tenant)

-    try:
-        cached_str = (
-            cached.decode("utf-8") if isinstance(cached, bytes) else str(cached)
-        )
-        return LicenseMetadata.model_validate_json(cached_str)
-    except Exception as e:
-        logger.warning(f"Failed to parse cached license metadata: {e}")
-        return None
+    cached = redis_client.get(LICENSE_METADATA_KEY)
+    if cached:
+        try:
+            cached_str: str
+            if isinstance(cached, bytes):
+                cached_str = cached.decode("utf-8")
+            else:
+                cached_str = str(cached)
+            return LicenseMetadata.model_validate_json(cached_str)
+        except Exception as e:
+            logger.warning(f"Failed to parse cached license metadata: {e}")
+            return None
+    return None


 def invalidate_license_cache(tenant_id: str | None = None) -> None:
    """
    Invalidate the license metadata cache (not the license itself).

-    Deletes the cached LicenseMetadata. The actual license in the database
-    is not affected. Delete is idempotent — if the key doesn't exist, this
-    is a no-op.
+    This deletes the cached LicenseMetadata from Redis. The actual license
+    in the database is not affected. Redis delete is idempotent - if the
+    key doesn't exist, this is a no-op.

    Args:
        tenant_id: Tenant ID (for multi-tenant deployments)
    """
-    cache = get_cache_backend(tenant_id=tenant_id)
-    cache.delete(LICENSE_METADATA_KEY)
+    tenant = tenant_id or get_current_tenant_id()
+    redis_client = get_redis_client(tenant_id=tenant)
+    redis_client.delete(LICENSE_METADATA_KEY)
    logger.info("License cache invalidated")


@@ -187,7 +192,7 @@ def update_license_cache(
    tenant_id: str | None = None,
 ) -> LicenseMetadata:
    """
-    Update the cache with license metadata.
+    Update the Redis cache with license metadata.

    We cache all license statuses (ACTIVE, GRACE_PERIOD, GATED_ACCESS) because:
    1. Frontend needs status to show appropriate UI/banners
@@ -206,7 +211,7 @@ def update_license_cache(
    from ee.onyx.utils.license import get_license_status

    tenant = tenant_id or get_current_tenant_id()
-    cache = get_cache_backend(tenant_id=tenant_id)
+    redis_client = get_redis_client(tenant_id=tenant)

    used_seats = get_used_seats(tenant)
    status = get_license_status(payload, grace_period_end)
@@ -225,7 +230,7 @@ def update_license_cache(
        stripe_subscription_id=payload.stripe_subscription_id,
    )

-    cache.set(
+    redis_client.set(
        LICENSE_METADATA_KEY,
        metadata.model_dump_json(),
        ex=LICENSE_CACHE_TTL_SECONDS,
--- a/backend/ee/onyx/db/user_group.py
+++ b/backend/ee/onyx/db/user_group.py
@@ -15,7 +15,6 @@ from sqlalchemy.orm import Session
 from ee.onyx.server.user_group.models import SetCuratorRequest
 from ee.onyx.server.user_group.models import UserGroupCreate
 from ee.onyx.server.user_group.models import UserGroupUpdate
-from onyx.configs.app_configs import DISABLE_VECTOR_DB
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.enums import AccessType
 from onyx.db.enums import ConnectorCredentialPairStatus
@@ -472,9 +471,7 @@ def _add_user_group__cc_pair_relationships__no_commit(

 def insert_user_group(db_session: Session, user_group: UserGroupCreate) -> UserGroup:
    db_user_group = UserGroup(
-        name=user_group.name,
-        time_last_modified_by_user=func.now(),
-        is_up_to_date=DISABLE_VECTOR_DB,
+        name=user_group.name, time_last_modified_by_user=func.now()
    )
    db_session.add(db_user_group)
    db_session.flush()  # give the group an ID
@@ -777,7 +774,8 @@ def update_user_group(
            cc_pair_ids=user_group_update.cc_pair_ids,
        )

-    if cc_pairs_updated and not DISABLE_VECTOR_DB:
+    # only needs to sync with Vespa if the cc_pairs have been updated
+    if cc_pairs_updated:
        db_user_group.is_up_to_date = False

    removed_users = db_session.scalars(
--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -4,6 +4,7 @@ from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from httpx_oauth.clients.google import GoogleOAuth2

+from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
 from ee.onyx.server.analytics.api import router as analytics_router
 from ee.onyx.server.auth_check import check_ee_router_auth
 from ee.onyx.server.billing.api import router as billing_router
@@ -152,9 +153,12 @@ def get_application() -> FastAPI:
    # License management
    include_router_with_global_prefix_prepended(application, license_router)

-    # Unified billing API - always registered in EE.
-    # Each endpoint is protected by the `current_admin_user` dependency (admin auth).
-    include_router_with_global_prefix_prepended(application, billing_router)
+    # Unified billing API - available when license system is enabled
+    # Works for both self-hosted and cloud deployments
+    # TODO(ENG-3533): Once frontend migrates to /admin/billing/*, this becomes the
+    # primary billing API and /tenants/* billing endpoints can be removed
+    if LICENSE_ENFORCEMENT_ENABLED:
+        include_router_with_global_prefix_prepended(application, billing_router)

    if MULTI_TENANT:
        # Tenant management
--- a/backend/ee/onyx/server/billing/api.py
+++ b/backend/ee/onyx/server/billing/api.py
@@ -26,6 +26,7 @@ import asyncio
 import httpx
 from fastapi import APIRouter
 from fastapi import Depends
+from fastapi import HTTPException
 from pydantic import BaseModel
 from sqlalchemy.orm import Session

@@ -41,6 +42,7 @@ from ee.onyx.server.billing.models import SeatUpdateRequest
 from ee.onyx.server.billing.models import SeatUpdateResponse
 from ee.onyx.server.billing.models import StripePublishableKeyResponse
 from ee.onyx.server.billing.models import SubscriptionStatusResponse
+from ee.onyx.server.billing.service import BillingServiceError
 from ee.onyx.server.billing.service import (
    create_checkout_session as create_checkout_service,
 )
@@ -56,8 +58,6 @@ from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
 from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
 from onyx.configs.app_configs import WEB_DOMAIN
 from onyx.db.engine.sql_engine import get_session
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
 from onyx.redis.redis_pool import get_shared_redis_client
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT
@@ -169,23 +169,26 @@ async def create_checkout_session(
    if seats is not None:
        used_seats = get_used_seats(tenant_id)
        if seats < used_seats:
-            raise OnyxError(
-                OnyxErrorCode.VALIDATION_ERROR,
-                f"Cannot subscribe with fewer seats than current usage. "
+            raise HTTPException(
+                status_code=400,
+                detail=f"Cannot subscribe with fewer seats than current usage. "
                f"You have {used_seats} active users/integrations but requested {seats} seats.",
            )

    # Build redirect URL for after checkout completion
    redirect_url = f"{WEB_DOMAIN}/admin/billing?checkout=success"

-    return await create_checkout_service(
-        billing_period=billing_period,
-        seats=seats,
-        email=email,
-        license_data=license_data,
-        redirect_url=redirect_url,
-        tenant_id=tenant_id,
-    )
+    try:
+        return await create_checkout_service(
+            billing_period=billing_period,
+            seats=seats,
+            email=email,
+            license_data=license_data,
+            redirect_url=redirect_url,
+            tenant_id=tenant_id,
+        )
+    except BillingServiceError as e:
+        raise HTTPException(status_code=e.status_code, detail=e.message)


@router.post("/create-customer-portal-session")
@@ -203,15 +206,18 @@ async def create_customer_portal_session(

    # Self-hosted requires license
    if not MULTI_TENANT and not license_data:
-        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "No license found")
+        raise HTTPException(status_code=400, detail="No license found")

    return_url = request.return_url if request else f"{WEB_DOMAIN}/admin/billing"

-    return await create_portal_service(
-        license_data=license_data,
-        return_url=return_url,
-        tenant_id=tenant_id,
-    )
+    try:
+        return await create_portal_service(
+            license_data=license_data,
+            return_url=return_url,
+            tenant_id=tenant_id,
+        )
+    except BillingServiceError as e:
+        raise HTTPException(status_code=e.status_code, detail=e.message)


@router.get("/billing-information")
@@ -234,9 +240,9 @@ async def get_billing_information(

    # Check circuit breaker (self-hosted only)
    if _is_billing_circuit_open():
-        raise OnyxError(
-            OnyxErrorCode.SERVICE_UNAVAILABLE,
-            "Stripe connection temporarily disabled. Click 'Connect to Stripe' to retry.",
+        raise HTTPException(
+            status_code=503,
+            detail="Stripe connection temporarily disabled. Click 'Connect to Stripe' to retry.",
        )

    try:
@@ -244,11 +250,11 @@ async def get_billing_information(
            license_data=license_data,
            tenant_id=tenant_id,
        )
-    except OnyxError as e:
+    except BillingServiceError as e:
        # Open circuit breaker on connection failures (self-hosted only)
        if e.status_code in (502, 503, 504):
            _open_billing_circuit()
-        raise
+        raise HTTPException(status_code=e.status_code, detail=e.message)


@router.post("/seats/update")
@@ -268,25 +274,31 @@ async def update_seats(

    # Self-hosted requires license
    if not MULTI_TENANT and not license_data:
-        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "No license found")
+        raise HTTPException(status_code=400, detail="No license found")

    # Validate that new seat count is not less than current used seats
    used_seats = get_used_seats(tenant_id)
    if request.new_seat_count < used_seats:
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            f"Cannot reduce seats below current usage. "
+        raise HTTPException(
+            status_code=400,
+            detail=f"Cannot reduce seats below current usage. "
            f"You have {used_seats} active users/integrations but requested {request.new_seat_count} seats.",
        )

-    # Note: Don't store license here - the control plane may still be processing
-    # the subscription update. The frontend should call /license/claim after a
-    # short delay to get the freshly generated license.
-    return await update_seat_service(
-        new_seat_count=request.new_seat_count,
-        license_data=license_data,
-        tenant_id=tenant_id,
-    )
+    try:
+        result = await update_seat_service(
+            new_seat_count=request.new_seat_count,
+            license_data=license_data,
+            tenant_id=tenant_id,
+        )
+
+        # Note: Don't store license here - the control plane may still be processing
+        # the subscription update. The frontend should call /license/claim after a
+        # short delay to get the freshly generated license.
+
+        return result
+    except BillingServiceError as e:
+        raise HTTPException(status_code=e.status_code, detail=e.message)


@router.get("/stripe-publishable-key")
@@ -317,18 +329,18 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
        if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
            key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
            if not key.startswith("pk_"):
-                raise OnyxError(
-                    OnyxErrorCode.INTERNAL_ERROR,
-                    "Invalid Stripe publishable key format",
+                raise HTTPException(
+                    status_code=500,
+                    detail="Invalid Stripe publishable key format",
                )
            _stripe_publishable_key_cache = key
            return StripePublishableKeyResponse(publishable_key=key)

        # Fall back to S3 bucket
        if not STRIPE_PUBLISHABLE_KEY_URL:
-            raise OnyxError(
-                OnyxErrorCode.INTERNAL_ERROR,
-                "Stripe publishable key is not configured",
+            raise HTTPException(
+                status_code=500,
+                detail="Stripe publishable key is not configured",
            )

        try:
@@ -339,17 +351,17 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:

                # Validate key format
                if not key.startswith("pk_"):
-                    raise OnyxError(
-                        OnyxErrorCode.INTERNAL_ERROR,
-                        "Invalid Stripe publishable key format",
+                    raise HTTPException(
+                        status_code=500,
+                        detail="Invalid Stripe publishable key format",
                    )

                _stripe_publishable_key_cache = key
                return StripePublishableKeyResponse(publishable_key=key)
        except httpx.HTTPError:
-            raise OnyxError(
-                OnyxErrorCode.INTERNAL_ERROR,
-                "Failed to fetch Stripe publishable key",
+            raise HTTPException(
+                status_code=500,
+                detail="Failed to fetch Stripe publishable key",
            )


--- a/backend/ee/onyx/server/billing/service.py
+++ b/backend/ee/onyx/server/billing/service.py
@@ -22,8 +22,6 @@ from ee.onyx.server.billing.models import SeatUpdateResponse
 from ee.onyx.server.billing.models import SubscriptionStatusResponse
 from ee.onyx.server.tenants.access import generate_data_plane_token
 from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT

@@ -33,6 +31,15 @@ logger = setup_logger()
 _REQUEST_TIMEOUT = 30.0


+class BillingServiceError(Exception):
+    """Exception raised for billing service errors."""
+
+    def __init__(self, message: str, status_code: int = 500):
+        self.message = message
+        self.status_code = status_code
+        super().__init__(self.message)
+
+
 def _get_proxy_headers(license_data: str | None) -> dict[str, str]:
    """Build headers for proxy requests (self-hosted).

@@ -94,7 +101,7 @@ async def _make_billing_request(
        Response JSON as dict

    Raises:
-        OnyxError: If request fails
+        BillingServiceError: If request fails
    """

    base_url = _get_base_url()
@@ -121,17 +128,11 @@ async def _make_billing_request(
        except Exception:
            pass
        logger.error(f"{error_message}: {e.response.status_code} - {detail}")
-        raise OnyxError(
-            OnyxErrorCode.BAD_GATEWAY,
-            detail,
-            status_code_override=e.response.status_code,
-        )
+        raise BillingServiceError(detail, e.response.status_code)

    except httpx.RequestError:
        logger.exception("Failed to connect to billing service")
-        raise OnyxError(
-            OnyxErrorCode.BAD_GATEWAY, "Failed to connect to billing service"
-        )
+        raise BillingServiceError("Failed to connect to billing service", 502)


 async def create_checkout_session(
--- a/backend/ee/onyx/server/enterprise_settings/api.py
+++ b/backend/ee/onyx/server/enterprise_settings/api.py
@@ -223,15 +223,6 @@ def get_active_scim_token(
    token = dal.get_active_token()
    if not token:
        raise HTTPException(status_code=404, detail="No active SCIM token")
-
-    # Derive the IdP domain from the first synced user as a heuristic.
-    idp_domain: str | None = None
-    mappings, _total = dal.list_user_mappings(start_index=1, count=1)
-    if mappings:
-        user = dal.get_user(mappings[0].user_id)
-        if user and "@" in user.email:
-            idp_domain = user.email.rsplit("@", 1)[1]
-
    return ScimTokenResponse(
        id=token.id,
        name=token.name,
@@ -239,7 +230,6 @@ def get_active_scim_token(
        is_active=token.is_active,
        created_at=token.created_at,
        last_used_at=token.last_used_at,
-        idp_domain=idp_domain,
    )


--- a/backend/ee/onyx/server/license/api.py
+++ b/backend/ee/onyx/server/license/api.py
@@ -14,6 +14,7 @@ import requests
 from fastapi import APIRouter
 from fastapi import Depends
 from fastapi import File
+from fastapi import HTTPException
 from fastapi import UploadFile
 from sqlalchemy.orm import Session

@@ -34,8 +35,6 @@ from ee.onyx.server.license.models import SeatUsageResponse
 from ee.onyx.utils.license import verify_license_signature
 from onyx.auth.users import User
 from onyx.db.engine.sql_engine import get_session
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT

@@ -128,9 +127,9 @@ async def claim_license(
    2. Without session_id: Re-claim using existing license for auth
    """
    if MULTI_TENANT:
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            "License claiming is only available for self-hosted deployments",
+        raise HTTPException(
+            status_code=400,
+            detail="License claiming is only available for self-hosted deployments",
        )

    try:
@@ -147,16 +146,15 @@ async def claim_license(
            # Re-claim using existing license for auth
            metadata = get_license_metadata(db_session)
            if not metadata or not metadata.tenant_id:
-                raise OnyxError(
-                    OnyxErrorCode.VALIDATION_ERROR,
-                    "No license found. Provide session_id after checkout.",
+                raise HTTPException(
+                    status_code=400,
+                    detail="No license found. Provide session_id after checkout.",
                )

            license_row = get_license(db_session)
            if not license_row or not license_row.license_data:
-                raise OnyxError(
-                    OnyxErrorCode.VALIDATION_ERROR,
-                    "No license found in database",
+                raise HTTPException(
+                    status_code=400, detail="No license found in database"
                )

            url = f"{CLOUD_DATA_PLANE_URL}/proxy/license/{metadata.tenant_id}"
@@ -175,7 +173,7 @@ async def claim_license(
        license_data = data.get("license")

        if not license_data:
-            raise OnyxError(OnyxErrorCode.NOT_FOUND, "No license in response")
+            raise HTTPException(status_code=404, detail="No license in response")

        # Verify signature before persisting
        payload = verify_license_signature(license_data)
@@ -201,14 +199,12 @@ async def claim_license(
            detail = error_data.get("detail", detail)
        except Exception:
            pass
-        raise OnyxError(
-            OnyxErrorCode.BAD_GATEWAY, detail, status_code_override=status_code
-        )
+        raise HTTPException(status_code=status_code, detail=detail)
    except ValueError as e:
-        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))
+        raise HTTPException(status_code=400, detail=str(e))
    except requests.RequestException:
-        raise OnyxError(
-            OnyxErrorCode.BAD_GATEWAY, "Failed to connect to license server"
+        raise HTTPException(
+            status_code=502, detail="Failed to connect to license server"
        )


@@ -225,9 +221,9 @@ async def upload_license(
    The license file must be cryptographically signed by Onyx.
    """
    if MULTI_TENANT:
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            "License upload is only available for self-hosted deployments",
+        raise HTTPException(
+            status_code=400,
+            detail="License upload is only available for self-hosted deployments",
        )

    try:
@@ -238,14 +234,14 @@ async def upload_license(
        # Remove any stray whitespace/newlines from user input
        license_data = license_data.strip()
    except UnicodeDecodeError:
-        raise OnyxError(OnyxErrorCode.INVALID_INPUT, "Invalid license file format")
+        raise HTTPException(status_code=400, detail="Invalid license file format")

    # Verify cryptographic signature - this is the only validation needed
    # The license's tenant_id identifies the customer in control plane, not locally
    try:
        payload = verify_license_signature(license_data)
    except ValueError as e:
-        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))
+        raise HTTPException(status_code=400, detail=str(e))

    # Persist to DB and update cache
    upsert_license(db_session, license_data)
@@ -301,9 +297,9 @@ async def delete_license(
    Admin only - removes license from database and invalidates cache.
    """
    if MULTI_TENANT:
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            "License deletion is only available for self-hosted deployments",
+        raise HTTPException(
+            status_code=400,
+            detail="License deletion is only available for self-hosted deployments",
        )

    try:
--- a/backend/ee/onyx/server/middleware/license_enforcement.py
+++ b/backend/ee/onyx/server/middleware/license_enforcement.py
@@ -46,6 +46,7 @@ from fastapi import FastAPI
 from fastapi import Request
 from fastapi import Response
 from fastapi.responses import JSONResponse
+from redis.exceptions import RedisError
 from sqlalchemy.exc import SQLAlchemyError

 from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
@@ -55,7 +56,6 @@ from ee.onyx.configs.license_enforcement_config import (
 )
 from ee.onyx.db.license import get_cached_license_metadata
 from ee.onyx.db.license import refresh_license_cache
-from onyx.cache.interface import CACHE_TRANSIENT_ERRORS
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.server.settings.models import ApplicationStatus
 from shared_configs.contextvars import get_current_tenant_id
@@ -164,9 +164,9 @@ def add_license_enforcement_middleware(
                    "[license_enforcement] No license, allowing community features"
                )
                is_gated = False
-        except CACHE_TRANSIENT_ERRORS as e:
+        except RedisError as e:
            logger.warning(f"Failed to check license metadata: {e}")
-            # Fail open - don't block users due to cache connectivity issues
+            # Fail open - don't block users due to Redis connectivity issues
            is_gated = False

        if is_gated:
--- a/backend/ee/onyx/server/scim/models.py
+++ b/backend/ee/onyx/server/scim/models.py
@@ -365,7 +365,6 @@ class ScimTokenResponse(BaseModel):
    is_active: bool
    created_at: datetime
    last_used_at: datetime | None = None
-    idp_domain: str | None = None


 class ScimTokenCreatedResponse(ScimTokenResponse):
--- a/backend/ee/onyx/server/settings/api.py
+++ b/backend/ee/onyx/server/settings/api.py
@@ -6,7 +6,6 @@ from sqlalchemy.exc import SQLAlchemyError
 from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
 from ee.onyx.db.license import get_cached_license_metadata
 from ee.onyx.db.license import refresh_license_cache
-from onyx.cache.interface import CACHE_TRANSIENT_ERRORS
 from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.server.settings.models import ApplicationStatus
@@ -126,7 +125,7 @@ def apply_license_status_to_settings(settings: Settings) -> Settings:
                # syncing) means indexed data may need protection.
                settings.application_status = _BLOCKING_STATUS
            settings.ee_features_enabled = False
-    except CACHE_TRANSIENT_ERRORS as e:
+    except RedisError as e:
        logger.warning(f"Failed to check license metadata for settings: {e}")
        # Fail closed - disable EE features if we can't verify license
        settings.ee_features_enabled = False
--- a/backend/ee/onyx/server/tenants/billing_api.py
+++ b/backend/ee/onyx/server/tenants/billing_api.py
@@ -21,6 +21,7 @@ import asyncio
 import httpx
 from fastapi import APIRouter
 from fastapi import Depends
+from fastapi import HTTPException

 from ee.onyx.auth.users import current_admin_user
 from ee.onyx.server.tenants.access import control_plane_dep
@@ -42,8 +43,6 @@ from onyx.auth.users import User
 from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
 from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
 from onyx.configs.app_configs import WEB_DOMAIN
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
 from onyx.utils.logger import setup_logger
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
 from shared_configs.contextvars import get_current_tenant_id
@@ -117,14 +116,9 @@ async def create_customer_portal_session(
    try:
        portal_url = fetch_customer_portal_session(tenant_id, return_url)
        return {"stripe_customer_portal_url": portal_url}
-    except OnyxError:
-        raise
-    except Exception:
+    except Exception as e:
        logger.exception("Failed to create customer portal session")
-        raise OnyxError(
-            OnyxErrorCode.INTERNAL_ERROR,
-            "Failed to create customer portal session",
-        )
+        raise HTTPException(status_code=500, detail=str(e))


@router.post("/create-checkout-session")
@@ -140,14 +134,9 @@ async def create_checkout_session(
    try:
        checkout_url = fetch_stripe_checkout_session(tenant_id, billing_period, seats)
        return {"stripe_checkout_url": checkout_url}
-    except OnyxError:
-        raise
-    except Exception:
+    except Exception as e:
        logger.exception("Failed to create checkout session")
-        raise OnyxError(
-            OnyxErrorCode.INTERNAL_ERROR,
-            "Failed to create checkout session",
-        )
+        raise HTTPException(status_code=500, detail=str(e))


@router.post("/create-subscription-session")
@@ -158,20 +147,15 @@ async def create_subscription_session(
    try:
        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
        if not tenant_id:
-            raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "Tenant ID not found")
+            raise HTTPException(status_code=400, detail="Tenant ID not found")

        billing_period = request.billing_period if request else "monthly"
        session_id = fetch_stripe_checkout_session(tenant_id, billing_period)
        return SubscriptionSessionResponse(sessionId=session_id)

-    except OnyxError:
-        raise
-    except Exception:
+    except Exception as e:
        logger.exception("Failed to create subscription session")
-        raise OnyxError(
-            OnyxErrorCode.INTERNAL_ERROR,
-            "Failed to create subscription session",
-        )
+        raise HTTPException(status_code=500, detail=str(e))


@router.get("/stripe-publishable-key")
@@ -202,18 +186,18 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
        if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
            key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
            if not key.startswith("pk_"):
-                raise OnyxError(
-                    OnyxErrorCode.INTERNAL_ERROR,
-                    "Invalid Stripe publishable key format",
+                raise HTTPException(
+                    status_code=500,
+                    detail="Invalid Stripe publishable key format",
                )
            _stripe_publishable_key_cache = key
            return StripePublishableKeyResponse(publishable_key=key)

        # Fall back to S3 bucket
        if not STRIPE_PUBLISHABLE_KEY_URL:
-            raise OnyxError(
-                OnyxErrorCode.INTERNAL_ERROR,
-                "Stripe publishable key is not configured",
+            raise HTTPException(
+                status_code=500,
+                detail="Stripe publishable key is not configured",
            )

        try:
@@ -224,15 +208,15 @@ async def get_stripe_publishable_key() -> StripePublishableKeyResponse:

                # Validate key format
                if not key.startswith("pk_"):
-                    raise OnyxError(
-                        OnyxErrorCode.INTERNAL_ERROR,
-                        "Invalid Stripe publishable key format",
+                    raise HTTPException(
+                        status_code=500,
+                        detail="Invalid Stripe publishable key format",
                    )

                _stripe_publishable_key_cache = key
                return StripePublishableKeyResponse(publishable_key=key)
        except httpx.HTTPError:
-            raise OnyxError(
-                OnyxErrorCode.INTERNAL_ERROR,
-                "Failed to fetch Stripe publishable key",
+            raise HTTPException(
+                status_code=500,
+                detail="Failed to fetch Stripe publishable key",
            )
--- a/backend/ee/onyx/server/user_group/api.py
+++ b/backend/ee/onyx/server/user_group/api.py
@@ -5,8 +5,6 @@ from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import Session

 from ee.onyx.db.user_group import add_users_to_user_group
-from ee.onyx.db.user_group import delete_user_group as db_delete_user_group
-from ee.onyx.db.user_group import fetch_user_group
 from ee.onyx.db.user_group import fetch_user_groups
 from ee.onyx.db.user_group import fetch_user_groups_for_user
 from ee.onyx.db.user_group import insert_user_group
@@ -22,7 +20,6 @@ from ee.onyx.server.user_group.models import UserGroupUpdate
 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_curator_or_admin_user
 from onyx.auth.users import current_user
-from onyx.configs.app_configs import DISABLE_VECTOR_DB
 from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import User
@@ -156,8 +153,3 @@ def delete_user_group(
        prepare_user_group_for_deletion(db_session, user_group_id)
    except ValueError as e:
        raise HTTPException(status_code=404, detail=str(e))
-
-    if DISABLE_VECTOR_DB:
-        user_group = fetch_user_group(db_session, user_group_id)
-        if user_group:
-            db_delete_user_group(db_session, user_group)
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -120,6 +120,7 @@ from onyx.db.models import User
 from onyx.db.pat import fetch_user_for_pat
 from onyx.db.users import get_user_by_email
 from onyx.redis.redis_pool import get_async_redis_connection
+from onyx.redis.redis_pool import get_redis_client
 from onyx.server.settings.store import load_settings
 from onyx.server.utils import BasicAuthenticationError
 from onyx.utils.logger import setup_logger
@@ -200,14 +201,13 @@ def user_needs_to_be_verified() -> bool:


 def anonymous_user_enabled(*, tenant_id: str | None = None) -> bool:
-    from onyx.cache.factory import get_cache_backend
-
-    cache = get_cache_backend(tenant_id=tenant_id)
-    value = cache.get(OnyxRedisLocks.ANONYMOUS_USER_ENABLED)
+    redis_client = get_redis_client(tenant_id=tenant_id)
+    value = redis_client.get(OnyxRedisLocks.ANONYMOUS_USER_ENABLED)

    if value is None:
        return False

+    assert isinstance(value, bytes)
    return int(value.decode("utf-8")) == 1


--- a/backend/onyx/background/celery/apps/background.py
+++ b/backend/onyx/background/celery/apps/background.py
@@ -0,0 +1,142 @@
+from typing import Any
+from typing import cast
+
+from celery import Celery
+from celery import signals
+from celery import Task
+from celery.apps.worker import Worker
+from celery.signals import celeryd_init
+from celery.signals import worker_init
+from celery.signals import worker_process_init
+from celery.signals import worker_ready
+from celery.signals import worker_shutdown
+
+import onyx.background.celery.apps.app_base as app_base
+from onyx.background.celery.celery_utils import httpx_init_vespa_pool
+from onyx.configs.app_configs import MANAGED_VESPA
+from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
+from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
+from onyx.configs.constants import POSTGRES_CELERY_WORKER_BACKGROUND_APP_NAME
+from onyx.db.engine.sql_engine import SqlEngine
+from onyx.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT
+
+
+logger = setup_logger()
+
+celery_app = Celery(__name__)
+celery_app.config_from_object("onyx.background.celery.configs.background")
+celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]
+
+
+@signals.task_prerun.connect
+def on_task_prerun(
+    sender: Any | None = None,
+    task_id: str | None = None,
+    task: Task | None = None,
+    args: tuple | None = None,
+    kwargs: dict | None = None,
+    **kwds: Any,
+) -> None:
+    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
+
+
+@signals.task_postrun.connect
+def on_task_postrun(
+    sender: Any | None = None,
+    task_id: str | None = None,
+    task: Task | None = None,
+    args: tuple | None = None,
+    kwargs: dict | None = None,
+    retval: Any | None = None,
+    state: str | None = None,
+    **kwds: Any,
+) -> None:
+    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
+
+
+@celeryd_init.connect
+def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
+    app_base.on_celeryd_init(sender, conf, **kwargs)
+
+
+@worker_init.connect
+def on_worker_init(sender: Worker, **kwargs: Any) -> None:
+    EXTRA_CONCURRENCY = 8  # small extra fudge factor for connection limits
+
+    logger.info("worker_init signal received for consolidated background worker.")
+
+    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_BACKGROUND_APP_NAME)
+    pool_size = cast(int, sender.concurrency)  # type: ignore
+    SqlEngine.init_engine(pool_size=pool_size, max_overflow=EXTRA_CONCURRENCY)
+
+    # Initialize Vespa httpx pool (needed for light worker tasks)
+    if MANAGED_VESPA:
+        httpx_init_vespa_pool(
+            sender.concurrency + EXTRA_CONCURRENCY,  # type: ignore
+            ssl_cert=VESPA_CLOUD_CERT_PATH,
+            ssl_key=VESPA_CLOUD_KEY_PATH,
+        )
+    else:
+        httpx_init_vespa_pool(sender.concurrency + EXTRA_CONCURRENCY)  # type: ignore
+
+    app_base.wait_for_redis(sender, **kwargs)
+    app_base.wait_for_db(sender, **kwargs)
+    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
+
+    # Less startup checks in multi-tenant case
+    if MULTI_TENANT:
+        return
+
+    app_base.on_secondary_worker_init(sender, **kwargs)
+
+
+@worker_ready.connect
+def on_worker_ready(sender: Any, **kwargs: Any) -> None:
+    app_base.on_worker_ready(sender, **kwargs)
+
+
+@worker_shutdown.connect
+def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
+    app_base.on_worker_shutdown(sender, **kwargs)
+
+
+@worker_process_init.connect
+def init_worker(**kwargs: Any) -> None:  # noqa: ARG001
+    SqlEngine.reset_engine()
+
+
+@signals.setup_logging.connect
+def on_setup_logging(
+    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
+) -> None:
+    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)
+
+
+base_bootsteps = app_base.get_bootsteps()
+for bootstep in base_bootsteps:
+    celery_app.steps["worker"].add(bootstep)
+
+celery_app.autodiscover_tasks(
+    app_base.filter_task_modules(
+        [
+            # Original background worker tasks
+            "onyx.background.celery.tasks.pruning",
+            "onyx.background.celery.tasks.monitoring",
+            "onyx.background.celery.tasks.user_file_processing",
+            "onyx.background.celery.tasks.llm_model_update",
+            # Light worker tasks
+            "onyx.background.celery.tasks.shared",
+            "onyx.background.celery.tasks.vespa",
+            "onyx.background.celery.tasks.connector_deletion",
+            "onyx.background.celery.tasks.doc_permission_syncing",
+            "onyx.background.celery.tasks.opensearch_migration",
+            # Docprocessing worker tasks
+            "onyx.background.celery.tasks.docprocessing",
+            # Docfetching worker tasks
+            "onyx.background.celery.tasks.docfetching",
+            # Sandbox cleanup tasks (isolated in build feature)
+            "onyx.server.features.build.sandbox.tasks",
+        ]
+    )
+)
--- a/backend/onyx/background/celery/celery_utils.py
+++ b/backend/onyx/background/celery/celery_utils.py
@@ -39,13 +39,9 @@ CT = TypeVar("CT", bound=ConnectorCheckpoint)


 class SlimConnectorExtractionResult(BaseModel):
-    """Result of extracting document IDs and hierarchy nodes from a connector.
+    """Result of extracting document IDs and hierarchy nodes from a connector."""

-    raw_id_to_parent maps document ID → parent_hierarchy_raw_node_id (or None).
-    Use raw_id_to_parent.keys() wherever the old set of IDs was needed.
-    """
-
-    raw_id_to_parent: dict[str, str | None]
+    doc_ids: set[str]
    hierarchy_nodes: list[HierarchyNode]


@@ -97,37 +93,30 @@ def _get_failure_id(failure: ConnectorFailure) -> str | None:
    return None


-class BatchResult(BaseModel):
-    raw_id_to_parent: dict[str, str | None]
-    hierarchy_nodes: list[HierarchyNode]
-
-
 def _extract_from_batch(
    doc_list: Sequence[Document | SlimDocument | HierarchyNode | ConnectorFailure],
-) -> BatchResult:
-    """Separate a batch into document IDs (with parent mapping) and hierarchy nodes.
+) -> tuple[set[str], list[HierarchyNode]]:
+    """Separate a batch into document IDs and hierarchy nodes.

    ConnectorFailure items have their failed document/entity IDs added to the
-    ID dict so that failed-to-retrieve documents are not accidentally pruned.
+    ID set so that failed-to-retrieve documents are not accidentally pruned.
    """
-    ids: dict[str, str | None] = {}
+    ids: set[str] = set()
    hierarchy_nodes: list[HierarchyNode] = []
    for item in doc_list:
        if isinstance(item, HierarchyNode):
            hierarchy_nodes.append(item)
-            if item.raw_node_id not in ids:
-                ids[item.raw_node_id] = None
+            ids.add(item.raw_node_id)
        elif isinstance(item, ConnectorFailure):
            failed_id = _get_failure_id(item)
            if failed_id:
-                ids[failed_id] = None
+                ids.add(failed_id)
            logger.warning(
                f"Failed to retrieve document {failed_id}: " f"{item.failure_message}"
            )
        else:
-            parent_raw = getattr(item, "parent_hierarchy_raw_node_id", None)
-            ids[item.id] = parent_raw
-    return BatchResult(raw_id_to_parent=ids, hierarchy_nodes=hierarchy_nodes)
+            ids.add(item.id)
+    return ids, hierarchy_nodes


 def extract_ids_from_runnable_connector(
@@ -143,7 +132,7 @@ def extract_ids_from_runnable_connector(

    Optionally, a callback can be passed to handle the length of each document batch.
    """
-    all_raw_id_to_parent: dict[str, str | None] = {}
+    all_connector_doc_ids: set[str] = set()
    all_hierarchy_nodes: list[HierarchyNode] = []

    # Sequence (covariant) lets all the specific list[...] iterator types unify here
@@ -188,20 +177,15 @@ def extract_ids_from_runnable_connector(
                "extract_ids_from_runnable_connector: Stop signal detected"
            )

-        batch_result = _extract_from_batch(doc_list)
-        batch_ids = batch_result.raw_id_to_parent
-        batch_nodes = batch_result.hierarchy_nodes
-        doc_batch_processing_func(batch_ids)
-        for k, v in batch_ids.items():
-            if v is not None or k not in all_raw_id_to_parent:
-                all_raw_id_to_parent[k] = v
+        batch_ids, batch_nodes = _extract_from_batch(doc_list)
+        all_connector_doc_ids.update(doc_batch_processing_func(batch_ids))
        all_hierarchy_nodes.extend(batch_nodes)

        if callback:
            callback.progress("extract_ids_from_runnable_connector", len(batch_ids))

    return SlimConnectorExtractionResult(
-        raw_id_to_parent=all_raw_id_to_parent,
+        doc_ids=all_connector_doc_ids,
        hierarchy_nodes=all_hierarchy_nodes,
    )

--- a/backend/onyx/background/celery/configs/background.py
+++ b/backend/onyx/background/celery/configs/background.py
@@ -0,0 +1,23 @@
+import onyx.background.celery.configs.base as shared_config
+from onyx.configs.app_configs import CELERY_WORKER_BACKGROUND_CONCURRENCY
+
+broker_url = shared_config.broker_url
+broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
+broker_pool_limit = shared_config.broker_pool_limit
+broker_transport_options = shared_config.broker_transport_options
+
+redis_socket_keepalive = shared_config.redis_socket_keepalive
+redis_retry_on_timeout = shared_config.redis_retry_on_timeout
+redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval
+
+result_backend = shared_config.result_backend
+result_expires = shared_config.result_expires  # 86400 seconds is the default
+
+task_default_priority = shared_config.task_default_priority
+task_acks_late = shared_config.task_acks_late
+
+worker_concurrency = CELERY_WORKER_BACKGROUND_CONCURRENCY
+worker_pool = "threads"
+# Increased from 1 to 4 to handle fast light worker tasks more efficiently
+# This allows the worker to prefetch multiple tasks per thread
+worker_prefetch_multiplier = 4
--- a/backend/onyx/background/celery/tasks/opensearch_migration/tasks.py
+++ b/backend/onyx/background/celery/tasks/opensearch_migration/tasks.py
@@ -30,7 +30,6 @@ from onyx.background.celery.tasks.opensearch_migration.transformer import (
    transform_vespa_chunks_to_opensearch_chunks,
 )
 from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
-from onyx.configs.app_configs import VESPA_MIGRATION_REQUEST_TIMEOUT_S
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
@@ -48,7 +47,6 @@ from onyx.document_index.interfaces_new import TenantState
 from onyx.document_index.opensearch.opensearch_document_index import (
    OpenSearchDocumentIndex,
 )
-from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
 from onyx.document_index.vespa.vespa_document_index import VespaDocumentIndex
 from onyx.indexing.models import IndexingSetting
 from onyx.redis.redis_pool import get_redis_client
@@ -148,12 +146,7 @@ def migrate_chunks_from_vespa_to_opensearch_task(
            task_logger.error(err_str)
            return False

-        with (
-            get_session_with_current_tenant() as db_session,
-            get_vespa_http_client(
-                timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
-            ) as vespa_client,
-        ):
+        with get_session_with_current_tenant() as db_session:
            try_insert_opensearch_tenant_migration_record_with_commit(db_session)
            search_settings = get_current_search_settings(db_session)
            tenant_state = TenantState(tenant_id=tenant_id, multitenant=MULTI_TENANT)
@@ -168,7 +161,6 @@ def migrate_chunks_from_vespa_to_opensearch_task(
                index_name=search_settings.index_name,
                tenant_state=tenant_state,
                large_chunks_enabled=False,
-                httpx_client=vespa_client,
            )

            sanitized_doc_start_time = time.monotonic()
--- a/backend/onyx/background/celery/tasks/pruning/tasks.py
+++ b/backend/onyx/background/celery/tasks/pruning/tasks.py
@@ -29,7 +29,6 @@ from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_PRUNING_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
 from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
-from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
@@ -48,8 +47,6 @@ from onyx.db.enums import AccessType
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
-from onyx.db.hierarchy import link_hierarchy_nodes_to_documents
-from onyx.db.hierarchy import update_document_parent_hierarchy_nodes
 from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
 from onyx.db.models import ConnectorCredentialPair
 from onyx.db.sync_record import insert_sync_record
@@ -60,8 +57,6 @@ from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_connector_prune import RedisConnectorPrunePayload
 from onyx.redis.redis_hierarchy import cache_hierarchy_nodes_batch
 from onyx.redis.redis_hierarchy import ensure_source_node_exists
-from onyx.redis.redis_hierarchy import get_node_id_from_raw_id
-from onyx.redis.redis_hierarchy import get_source_node_id_from_cache
 from onyx.redis.redis_hierarchy import HierarchyNodeCacheEntry
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import get_redis_replica_client
@@ -118,38 +113,6 @@ class PruneCallback(IndexingCallbackBase):
        super().progress(tag, amount)


-def _resolve_and_update_document_parents(
-    db_session: Session,
-    redis_client: Redis,
-    source: DocumentSource,
-    raw_id_to_parent: dict[str, str | None],
-) -> None:
-    """Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id for
-    each document and bulk-update the DB. Mirrors the resolution logic in
-    run_docfetching.py."""
-    source_node_id = get_source_node_id_from_cache(redis_client, db_session, source)
-
-    resolved: dict[str, int | None] = {}
-    for doc_id, raw_parent_id in raw_id_to_parent.items():
-        if raw_parent_id is None:
-            continue
-        node_id, found = get_node_id_from_raw_id(redis_client, source, raw_parent_id)
-        resolved[doc_id] = node_id if found else source_node_id
-
-    if not resolved:
-        return
-
-    update_document_parent_hierarchy_nodes(
-        db_session=db_session,
-        doc_parent_map=resolved,
-        commit=True,
-    )
-    task_logger.info(
-        f"Pruning: resolved and updated parent hierarchy for "
-        f"{len(resolved)} documents (source={source.value})"
-    )
-
-
 """Jobs / utils for kicking off pruning tasks."""


@@ -572,22 +535,22 @@ def connector_pruning_generator_task(
            extraction_result = extract_ids_from_runnable_connector(
                runnable_connector, callback
            )
-            all_connector_doc_ids = extraction_result.raw_id_to_parent
+            all_connector_doc_ids = extraction_result.doc_ids

            # Process hierarchy nodes (same as docfetching):
            # upsert to Postgres and cache in Redis
-            source = cc_pair.connector.source
-            redis_client = get_redis_client(tenant_id=tenant_id)
-
            if extraction_result.hierarchy_nodes:
                is_connector_public = cc_pair.access_type == AccessType.PUBLIC

-                ensure_source_node_exists(redis_client, db_session, source)
+                redis_client = get_redis_client(tenant_id=tenant_id)
+                ensure_source_node_exists(
+                    redis_client, db_session, cc_pair.connector.source
+                )

                upserted_nodes = upsert_hierarchy_nodes_batch(
                    db_session=db_session,
                    nodes=extraction_result.hierarchy_nodes,
-                    source=source,
+                    source=cc_pair.connector.source,
                    commit=True,
                    is_connector_public=is_connector_public,
                )
@@ -598,7 +561,7 @@ def connector_pruning_generator_task(
                ]
                cache_hierarchy_nodes_batch(
                    redis_client=redis_client,
-                    source=source,
+                    source=cc_pair.connector.source,
                    entries=cache_entries,
                )

@@ -607,26 +570,6 @@ def connector_pruning_generator_task(
                    f"hierarchy nodes for cc_pair={cc_pair_id}"
                )

-            ensure_source_node_exists(redis_client, db_session, source)
-            # Resolve parent_hierarchy_raw_node_id → parent_hierarchy_node_id
-            # and bulk-update documents, mirroring the docfetching resolution
-            _resolve_and_update_document_parents(
-                db_session=db_session,
-                redis_client=redis_client,
-                source=source,
-                raw_id_to_parent=all_connector_doc_ids,
-            )
-
-            # Link hierarchy nodes to documents for sources where pages can be
-            # both hierarchy nodes AND documents (e.g. Notion, Confluence)
-            all_doc_id_list = list(all_connector_doc_ids.keys())
-            link_hierarchy_nodes_to_documents(
-                db_session=db_session,
-                document_ids=all_doc_id_list,
-                source=source,
-                commit=True,
-            )
-
            # a list of docs in our local index
            all_indexed_document_ids = {
                doc.id
@@ -638,9 +581,7 @@ def connector_pruning_generator_task(
            }

            # generate list of docs to remove (no longer in the source)
-            doc_ids_to_remove = list(
-                all_indexed_document_ids - all_connector_doc_ids.keys()
-            )
+            doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids)

            task_logger.info(
                "Pruning set collected: "
--- a/backend/onyx/background/celery/versioned_apps/background.py
+++ b/backend/onyx/background/celery/versioned_apps/background.py
@@ -0,0 +1,10 @@
+from celery import Celery
+
+from onyx.utils.variable_functionality import fetch_versioned_implementation
+from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
+
+set_is_ee_based_on_env_variable()
+app: Celery = fetch_versioned_implementation(
+    "onyx.background.celery.apps.background",
+    "celery_app",
+)
--- a/backend/onyx/cache/interface.py
+++ b/backend/onyx/cache/interface.py
@@ -1,20 +1,9 @@
 import abc
 from enum import Enum

-from redis.exceptions import RedisError
-from sqlalchemy.exc import SQLAlchemyError
-
 TTL_KEY_NOT_FOUND = -2
 TTL_NO_EXPIRY = -1

-CACHE_TRANSIENT_ERRORS: tuple[type[Exception], ...] = (RedisError, SQLAlchemyError)
-"""Exception types that represent transient cache connectivity / operational
-failures.  Callers that want to fail-open (or fail-closed) on cache errors
-should catch this tuple instead of bare ``Exception``.
-
-When adding a new ``CacheBackend`` implementation, add its transient error
-base class(es) here so all call-sites pick it up automatically."""
-

 class CacheBackendType(str, Enum):
    REDIS = "redis"
--- a/backend/onyx/chat/llm_loop.py
+++ b/backend/onyx/chat/llm_loop.py
@@ -52,7 +52,6 @@ from onyx.tools.built_in_tools import STOPPING_TOOLS_NAMES
 from onyx.tools.interface import Tool
 from onyx.tools.models import ChatFile
 from onyx.tools.models import MemoryToolResponseSnapshot
-from onyx.tools.models import PythonToolRichResponse
 from onyx.tools.models import ToolCallInfo
 from onyx.tools.models import ToolCallKickoff
 from onyx.tools.models import ToolResponse
@@ -967,13 +966,6 @@ def run_llm_loop(
                ):
                    generated_images = tool_response.rich_response.generated_images

-                # Extract generated_files if this is a code interpreter response
-                generated_files = None
-                if isinstance(tool_response.rich_response, PythonToolRichResponse):
-                    generated_files = (
-                        tool_response.rich_response.generated_files or None
-                    )
-
                # Persist memory if this is a memory tool response
                memory_snapshot: MemoryToolResponseSnapshot | None = None
                if isinstance(tool_response.rich_response, MemoryToolResponse):
@@ -1025,7 +1017,6 @@ def run_llm_loop(
                    tool_call_response=saved_response,
                    search_docs=displayed_docs or search_docs,
                    generated_images=generated_images,
-                    generated_files=generated_files,
                )
                # Add to state container for partial save support
                state_container.add_tool_call(tool_call_info)
--- a/backend/onyx/chat/save_chat.py
+++ b/backend/onyx/chat/save_chat.py
@@ -1,5 +1,4 @@
 import json
-import mimetypes

 from sqlalchemy.orm import Session

@@ -13,41 +12,14 @@ from onyx.db.chat import create_db_search_doc
 from onyx.db.models import ChatMessage
 from onyx.db.models import ToolCall
 from onyx.db.tools import create_tool_call_no_commit
-from onyx.file_store.models import FileDescriptor
 from onyx.natural_language_processing.utils import BaseTokenizer
 from onyx.natural_language_processing.utils import get_tokenizer
-from onyx.server.query_and_chat.chat_utils import mime_type_to_chat_file_type
 from onyx.tools.models import ToolCallInfo
 from onyx.utils.logger import setup_logger

 logger = setup_logger()


-def _extract_referenced_file_descriptors(
-    tool_calls: list[ToolCallInfo],
-    message_text: str,
-) -> list[FileDescriptor]:
-    """Extract FileDescriptors for code interpreter files referenced in the message text."""
-    descriptors: list[FileDescriptor] = []
-    for tool_call_info in tool_calls:
-        if not tool_call_info.generated_files:
-            continue
-        for gen_file in tool_call_info.generated_files:
-            file_id = (
-                gen_file.file_link.rsplit("/", 1)[-1] if gen_file.file_link else ""
-            )
-            if file_id and file_id in message_text:
-                mime_type, _ = mimetypes.guess_type(gen_file.filename)
-                descriptors.append(
-                    FileDescriptor(
-                        id=file_id,
-                        type=mime_type_to_chat_file_type(mime_type),
-                        name=gen_file.filename,
-                    )
-                )
-    return descriptors
-
-
 def _create_and_link_tool_calls(
    tool_calls: list[ToolCallInfo],
    assistant_message: ChatMessage,
@@ -325,14 +297,5 @@ def save_chat_turn(
        citation_number_to_search_doc_id if citation_number_to_search_doc_id else None
    )

-    # 8. Attach code interpreter generated files that the assistant actually
-    # referenced in its response, so they are available via load_all_chat_files
-    # on subsequent turns. Files not mentioned are intermediate artifacts.
-    if message_text:
-        referenced = _extract_referenced_file_descriptors(tool_calls, message_text)
-        if referenced:
-            existing_files = assistant_message.files or []
-            assistant_message.files = existing_files + referenced
-
    # Finally save the messages, tool calls, and docs
    db_session.commit()
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -495,7 +495,14 @@ CELERY_WORKER_PRIMARY_POOL_OVERFLOW = int(
    os.environ.get("CELERY_WORKER_PRIMARY_POOL_OVERFLOW") or 4
 )

-# Individual worker concurrency settings
+# Consolidated background worker (light, docprocessing, docfetching, heavy, monitoring, user_file_processing)
+# separate workers' defaults: light=24, docprocessing=6, docfetching=1, heavy=4, kg=2, monitoring=1, user_file=2
+# Total would be 40, but we use a more conservative default of 20 for the consolidated worker
+CELERY_WORKER_BACKGROUND_CONCURRENCY = int(
+    os.environ.get("CELERY_WORKER_BACKGROUND_CONCURRENCY") or 20
+)
+
+# Individual worker concurrency settings (used when USE_LIGHTWEIGHT_BACKGROUND_WORKER is False or on Kuberenetes deployments)
 CELERY_WORKER_HEAVY_CONCURRENCY = int(
    os.environ.get("CELERY_WORKER_HEAVY_CONCURRENCY") or 4
 )
@@ -812,9 +819,7 @@ RERANK_COUNT = int(os.environ.get("RERANK_COUNT") or 1000)
 # Tool Configs
 #####
 # Code Interpreter Service Configuration
-CODE_INTERPRETER_BASE_URL = os.environ.get(
-    "CODE_INTERPRETER_BASE_URL", "http://localhost:8000"
-)
+CODE_INTERPRETER_BASE_URL = os.environ.get("CODE_INTERPRETER_BASE_URL")

 CODE_INTERPRETER_DEFAULT_TIMEOUT_MS = int(
    os.environ.get("CODE_INTERPRETER_DEFAULT_TIMEOUT_MS") or 60_000
@@ -895,9 +900,6 @@ CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads(
 )

 VESPA_REQUEST_TIMEOUT = int(os.environ.get("VESPA_REQUEST_TIMEOUT") or "15")
-VESPA_MIGRATION_REQUEST_TIMEOUT_S = int(
-    os.environ.get("VESPA_MIGRATION_REQUEST_TIMEOUT_S") or "120"
-)

 SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000")

--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -84,6 +84,7 @@ POSTGRES_CELERY_WORKER_LIGHT_APP_NAME = "celery_worker_light"
 POSTGRES_CELERY_WORKER_DOCPROCESSING_APP_NAME = "celery_worker_docprocessing"
 POSTGRES_CELERY_WORKER_DOCFETCHING_APP_NAME = "celery_worker_docfetching"
 POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME = "celery_worker_indexing_child"
+POSTGRES_CELERY_WORKER_BACKGROUND_APP_NAME = "celery_worker_background"
 POSTGRES_CELERY_WORKER_HEAVY_APP_NAME = "celery_worker_heavy"
 POSTGRES_CELERY_WORKER_MONITORING_APP_NAME = "celery_worker_monitoring"
 POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME = (
--- a/backend/onyx/connectors/confluence/connector.py
+++ b/backend/onyx/connectors/confluence/connector.py
@@ -943,9 +943,6 @@ class ConfluenceConnector(
                        if include_permissions
                        else None
                    ),
-                    parent_hierarchy_raw_node_id=self._get_parent_hierarchy_raw_id(
-                        page
-                    ),
                )
            )

@@ -995,7 +992,6 @@ class ConfluenceConnector(
                            if include_permissions
                            else None
                        ),
-                        parent_hierarchy_raw_node_id=page_id,
                    )
                )

--- a/backend/onyx/connectors/google_drive/doc_conversion.py
+++ b/backend/onyx/connectors/google_drive/doc_conversion.py
@@ -781,5 +781,4 @@ def build_slim_document(
    return SlimDocument(
        id=onyx_document_id_from_drive_file(file),
        external_access=external_access,
-        parent_hierarchy_raw_node_id=(file.get("parents") or [None])[0],
    )
--- a/backend/onyx/connectors/jira/connector.py
+++ b/backend/onyx/connectors/jira/connector.py
@@ -902,11 +902,6 @@ class JiraConnector(
                        external_access=self._get_project_permissions(
                            project_key, add_prefix=False
                        ),
-                        parent_hierarchy_raw_node_id=(
-                            self._get_parent_hierarchy_raw_node_id(issue, project_key)
-                            if project_key
-                            else None
-                        ),
                    )
                )
                current_offset += 1
--- a/backend/onyx/connectors/models.py
+++ b/backend/onyx/connectors/models.py
@@ -385,7 +385,6 @@ class IndexingDocument(Document):
 class SlimDocument(BaseModel):
    id: str
    external_access: ExternalAccess | None = None
-    parent_hierarchy_raw_node_id: str | None = None


 class HierarchyNode(BaseModel):
--- a/backend/onyx/connectors/sharepoint/connector.py
+++ b/backend/onyx/connectors/sharepoint/connector.py
@@ -772,7 +772,6 @@ def _convert_driveitem_to_slim_document(
    drive_name: str,
    ctx: ClientContext,
    graph_client: GraphClient,
-    parent_hierarchy_raw_node_id: str | None = None,
 ) -> SlimDocument:
    if driveitem.id is None:
        raise ValueError("DriveItem ID is required")
@@ -788,15 +787,11 @@ def _convert_driveitem_to_slim_document(
    return SlimDocument(
        id=driveitem.id,
        external_access=external_access,
-        parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
    )


 def _convert_sitepage_to_slim_document(
-    site_page: dict[str, Any],
-    ctx: ClientContext | None,
-    graph_client: GraphClient,
-    parent_hierarchy_raw_node_id: str | None = None,
+    site_page: dict[str, Any], ctx: ClientContext | None, graph_client: GraphClient
 ) -> SlimDocument:
    """Convert a SharePoint site page to a SlimDocument object."""
    if site_page.get("id") is None:
@@ -813,7 +808,6 @@ def _convert_sitepage_to_slim_document(
    return SlimDocument(
        id=id,
        external_access=external_access,
-        parent_hierarchy_raw_node_id=parent_hierarchy_raw_node_id,
    )


@@ -1600,22 +1594,12 @@ class SharepointConnector(
                            )
                        )

-                    parent_hierarchy_url: str | None = None
-                    if drive_web_url:
-                        parent_hierarchy_url = self._get_parent_hierarchy_url(
-                            site_url, drive_web_url, drive_name, driveitem
-                        )
-
                    try:
                        logger.debug(f"Processing: {driveitem.web_url}")
                        ctx = self._create_rest_client_context(site_descriptor.url)
                        doc_batch.append(
                            _convert_driveitem_to_slim_document(
-                                driveitem,
-                                drive_name,
-                                ctx,
-                                self.graph_client,
-                                parent_hierarchy_raw_node_id=parent_hierarchy_url,
+                                driveitem, drive_name, ctx, self.graph_client
                            )
                        )
                    except Exception as e:
@@ -1635,10 +1619,7 @@ class SharepointConnector(
                    ctx = self._create_rest_client_context(site_descriptor.url)
                    doc_batch.append(
                        _convert_sitepage_to_slim_document(
-                            site_page,
-                            ctx,
-                            self.graph_client,
-                            parent_hierarchy_raw_node_id=site_descriptor.url,
+                            site_page, ctx, self.graph_client
                        )
                    )
                    if len(doc_batch) >= SLIM_BATCH_SIZE:
--- a/backend/onyx/connectors/slack/connector.py
+++ b/backend/onyx/connectors/slack/connector.py
@@ -565,7 +565,6 @@ def _get_all_doc_ids(
                            channel_id=channel_id, thread_ts=message["ts"]
                        ),
                        external_access=external_access,
-                        parent_hierarchy_raw_node_id=channel_id,
                    )
                )

--- a/backend/onyx/db/document_set.py
+++ b/backend/onyx/db/document_set.py
@@ -13,7 +13,6 @@ from sqlalchemy.orm import aliased
 from sqlalchemy.orm import selectinload
 from sqlalchemy.orm import Session

-from onyx.configs.app_configs import DISABLE_VECTOR_DB
 from onyx.db.connector_credential_pair import get_cc_pair_groups_for_ids
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
 from onyx.db.enums import AccessType
@@ -247,7 +246,6 @@ def insert_document_set(
            description=document_set_creation_request.description,
            user_id=user_id,
            is_public=document_set_creation_request.is_public,
-            is_up_to_date=DISABLE_VECTOR_DB,
            time_last_modified_by_user=func.now(),
        )
        db_session.add(new_document_set_row)
@@ -338,8 +336,7 @@ def update_document_set(
            )

        document_set_row.description = document_set_update_request.description
-        if not DISABLE_VECTOR_DB:
-            document_set_row.is_up_to_date = False
+        document_set_row.is_up_to_date = False
        document_set_row.is_public = document_set_update_request.is_public
        document_set_row.time_last_modified_by_user = func.now()
        versioned_private_doc_set_fn = fetch_versioned_implementation(
--- a/backend/onyx/db/hierarchy.py
+++ b/backend/onyx/db/hierarchy.py
@@ -1,7 +1,5 @@
 """CRUD operations for HierarchyNode."""

-from collections import defaultdict
-
 from sqlalchemy import select
 from sqlalchemy.orm import Session

@@ -527,53 +525,6 @@ def get_document_parent_hierarchy_node_ids(
    return {doc_id: parent_id for doc_id, parent_id in results}


-def update_document_parent_hierarchy_nodes(
-    db_session: Session,
-    doc_parent_map: dict[str, int | None],
-    commit: bool = True,
-) -> int:
-    """Bulk-update Document.parent_hierarchy_node_id for multiple documents.
-
-    Only updates rows whose current value differs from the desired value to
-    avoid unnecessary writes.
-
-    Args:
-        db_session: SQLAlchemy session
-        doc_parent_map: Mapping of document_id → desired parent_hierarchy_node_id
-        commit: Whether to commit the transaction
-
-    Returns:
-        Number of documents actually updated
-    """
-    if not doc_parent_map:
-        return 0
-
-    doc_ids = list(doc_parent_map.keys())
-    existing = get_document_parent_hierarchy_node_ids(db_session, doc_ids)
-
-    by_parent: dict[int | None, list[str]] = defaultdict(list)
-    for doc_id, desired_parent_id in doc_parent_map.items():
-        current = existing.get(doc_id)
-        if current == desired_parent_id or doc_id not in existing:
-            continue
-        by_parent[desired_parent_id].append(doc_id)
-
-    updated = 0
-    for desired_parent_id, ids in by_parent.items():
-        db_session.query(Document).filter(Document.id.in_(ids)).update(
-            {Document.parent_hierarchy_node_id: desired_parent_id},
-            synchronize_session=False,
-        )
-        updated += len(ids)
-
-    if commit:
-        db_session.commit()
-    elif updated:
-        db_session.flush()
-
-    return updated
-
-
 def update_hierarchy_node_permissions(
    db_session: Session,
    raw_node_id: str,
--- a/backend/onyx/db/llm.py
+++ b/backend/onyx/db/llm.py
@@ -532,7 +532,6 @@ def fetch_default_model(
 ) -> ModelConfiguration | None:
    model_config = db_session.scalar(
        select(ModelConfiguration)
-        .options(selectinload(ModelConfiguration.llm_provider))
        .join(LLMModelFlow)
        .where(
            ModelConfiguration.is_visible == True,  # noqa: E712
--- a/backend/onyx/db/projects.py
+++ b/backend/onyx/db/projects.py
@@ -52,7 +52,7 @@ def create_user_files(
 ) -> CategorizedFilesResult:

    # Categorize the files
-    categorized_files = categorize_uploaded_files(files, db_session)
+    categorized_files = categorize_uploaded_files(files)
    # NOTE: At the moment, zip metadata is not used for user files.
    # Should revisit to decide whether this should be a feature.
    upload_response = upload_files(categorized_files.acceptable, FileOrigin.USER_FILE)
--- a/backend/onyx/db/search_settings.py
+++ b/backend/onyx/db/search_settings.py
@@ -129,7 +129,7 @@ def get_current_search_settings(db_session: Session) -> SearchSettings:
    latest_settings = result.scalars().first()

    if not latest_settings:
-        raise RuntimeError("No search settings specified; DB is not in a valid state.")
+        raise RuntimeError("No search settings specified, DB is not in a valid state")
    return latest_settings


--- a/backend/onyx/document_index/document_index_utils.py
+++ b/backend/onyx/document_index/document_index_utils.py
@@ -32,6 +32,9 @@ def get_multipass_config(search_settings: SearchSettings) -> MultipassConfig:
    Determines whether to enable multipass and large chunks by examining
    the current search settings and the embedder configuration.
    """
+    if not search_settings:
+        return MultipassConfig(multipass_indexing=False, enable_large_chunks=False)
+
    multipass = should_use_multipass(search_settings)
    enable_large_chunks = SearchSettings.can_use_large_chunks(
        multipass, search_settings.model_name, search_settings.provider_type
--- a/backend/onyx/document_index/factory.py
+++ b/backend/onyx/document_index/factory.py
@@ -26,10 +26,11 @@ def get_default_document_index(
    To be used for retrieval only. Indexing should be done through both indices
    until Vespa is deprecated.

+    Pre-existing docstring for this function, although secondary indices are not
+    currently supported:
    Primary index is the index that is used for querying/updating etc. Secondary
    index is for when both the currently used index and the upcoming index both
-    need to be updated. Updates are applied to both indices.
-    WARNING: In that case, get_all_document_indices should be used.
+    need to be updated, updates are applied to both indices.
    """
    if DISABLE_VECTOR_DB:
        return DisabledDocumentIndex(
@@ -50,26 +51,11 @@ def get_default_document_index(
    opensearch_retrieval_enabled = get_opensearch_retrieval_state(db_session)
    if opensearch_retrieval_enabled:
        indexing_setting = IndexingSetting.from_db_model(search_settings)
-        secondary_indexing_setting = (
-            IndexingSetting.from_db_model(secondary_search_settings)
-            if secondary_search_settings
-            else None
-        )
        return OpenSearchOldDocumentIndex(
            index_name=search_settings.index_name,
            embedding_dim=indexing_setting.final_embedding_dim,
            embedding_precision=indexing_setting.embedding_precision,
            secondary_index_name=secondary_index_name,
-            secondary_embedding_dim=(
-                secondary_indexing_setting.final_embedding_dim
-                if secondary_indexing_setting
-                else None
-            ),
-            secondary_embedding_precision=(
-                secondary_indexing_setting.embedding_precision
-                if secondary_indexing_setting
-                else None
-            ),
            large_chunks_enabled=search_settings.large_chunks_enabled,
            secondary_large_chunks_enabled=secondary_large_chunks_enabled,
            multitenant=MULTI_TENANT,
@@ -100,7 +86,8 @@ def get_all_document_indices(
    Used for indexing only. Until Vespa is deprecated we will index into both
    document indices. Retrieval is done through only one index however.

-    Large chunks are not currently supported so we hardcode appropriate values.
+    Large chunks and secondary indices are not currently supported so we
+    hardcode appropriate values.

    NOTE: Make sure the Vespa index object is returned first. In the rare event
    that there is some conflict between indexing and the migration task, it is
@@ -136,36 +123,13 @@ def get_all_document_indices(
    opensearch_document_index: OpenSearchOldDocumentIndex | None = None
    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
        indexing_setting = IndexingSetting.from_db_model(search_settings)
-        secondary_indexing_setting = (
-            IndexingSetting.from_db_model(secondary_search_settings)
-            if secondary_search_settings
-            else None
-        )
        opensearch_document_index = OpenSearchOldDocumentIndex(
            index_name=search_settings.index_name,
            embedding_dim=indexing_setting.final_embedding_dim,
            embedding_precision=indexing_setting.embedding_precision,
-            secondary_index_name=(
-                secondary_search_settings.index_name
-                if secondary_search_settings
-                else None
-            ),
-            secondary_embedding_dim=(
-                secondary_indexing_setting.final_embedding_dim
-                if secondary_indexing_setting
-                else None
-            ),
-            secondary_embedding_precision=(
-                secondary_indexing_setting.embedding_precision
-                if secondary_indexing_setting
-                else None
-            ),
-            large_chunks_enabled=search_settings.large_chunks_enabled,
-            secondary_large_chunks_enabled=(
-                secondary_search_settings.large_chunks_enabled
-                if secondary_search_settings
-                else None
-            ),
+            secondary_index_name=None,
+            large_chunks_enabled=False,
+            secondary_large_chunks_enabled=None,
            multitenant=MULTI_TENANT,
            httpx_client=httpx_client,
        )
--- a/backend/onyx/document_index/opensearch/opensearch_document_index.py
+++ b/backend/onyx/document_index/opensearch/opensearch_document_index.py
@@ -271,9 +271,6 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
        embedding_dim: int,
        embedding_precision: EmbeddingPrecision,
        secondary_index_name: str | None,
-        secondary_embedding_dim: int | None,
-        secondary_embedding_precision: EmbeddingPrecision | None,
-        # NOTE: We do not support large chunks right now.
        large_chunks_enabled: bool,  # noqa: ARG002
        secondary_large_chunks_enabled: bool | None,  # noqa: ARG002
        multitenant: bool = False,
@@ -289,25 +286,12 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
                f"Expected {MULTI_TENANT}, got {multitenant}."
            )
        tenant_id = get_current_tenant_id()
-        tenant_state = TenantState(tenant_id=tenant_id, multitenant=multitenant)
        self._real_index = OpenSearchDocumentIndex(
-            tenant_state=tenant_state,
+            tenant_state=TenantState(tenant_id=tenant_id, multitenant=multitenant),
            index_name=index_name,
            embedding_dim=embedding_dim,
            embedding_precision=embedding_precision,
        )
-        self._secondary_real_index: OpenSearchDocumentIndex | None = None
-        if self.secondary_index_name:
-            if secondary_embedding_dim is None or secondary_embedding_precision is None:
-                raise ValueError(
-                    "Bug: Secondary index embedding dimension and precision are not set."
-                )
-            self._secondary_real_index = OpenSearchDocumentIndex(
-                tenant_state=tenant_state,
-                index_name=self.secondary_index_name,
-                embedding_dim=secondary_embedding_dim,
-                embedding_precision=secondary_embedding_precision,
-            )

    @staticmethod
    def register_multitenant_indices(
@@ -323,38 +307,19 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
        self,
        primary_embedding_dim: int,
        primary_embedding_precision: EmbeddingPrecision,
-        secondary_index_embedding_dim: int | None,
-        secondary_index_embedding_precision: EmbeddingPrecision | None,
+        secondary_index_embedding_dim: int | None,  # noqa: ARG002
+        secondary_index_embedding_precision: EmbeddingPrecision | None,  # noqa: ARG002
    ) -> None:
-        self._real_index.verify_and_create_index_if_necessary(
+        # Only handle primary index for now, ignore secondary.
+        return self._real_index.verify_and_create_index_if_necessary(
            primary_embedding_dim, primary_embedding_precision
        )
-        if self.secondary_index_name:
-            if (
-                secondary_index_embedding_dim is None
-                or secondary_index_embedding_precision is None
-            ):
-                raise ValueError(
-                    "Bug: Secondary index embedding dimension and precision are not set."
-                )
-            assert (
-                self._secondary_real_index is not None
-            ), "Bug: Secondary index is not initialized."
-            self._secondary_real_index.verify_and_create_index_if_necessary(
-                secondary_index_embedding_dim, secondary_index_embedding_precision
-            )

    def index(
        self,
        chunks: list[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[OldDocumentInsertionRecord]:
-        """
-        NOTE: Do NOT consider the secondary index here. A separate indexing
-        pipeline will be responsible for indexing to the secondary index. This
-        design is not ideal and we should reconsider this when revamping index
-        swapping.
-        """
        # Convert IndexBatchParams to IndexingMetadata.
        chunk_counts: dict[str, IndexingMetadata.ChunkCounts] = {}
        for doc_id in index_batch_params.doc_id_to_new_chunk_cnt:
@@ -386,20 +351,7 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
        tenant_id: str,  # noqa: ARG002
        chunk_count: int | None,
    ) -> int:
-        """
-        NOTE: Remember to handle the secondary index here. There is no separate
-        pipeline for deleting chunks in the secondary index. This design is not
-        ideal and we should reconsider this when revamping index swapping.
-        """
-        total_chunks_deleted = self._real_index.delete(doc_id, chunk_count)
-        if self.secondary_index_name:
-            assert (
-                self._secondary_real_index is not None
-            ), "Bug: Secondary index is not initialized."
-            total_chunks_deleted += self._secondary_real_index.delete(
-                doc_id, chunk_count
-            )
-        return total_chunks_deleted
+        return self._real_index.delete(doc_id, chunk_count)

    def update_single(
        self,
@@ -410,11 +362,6 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):
        fields: VespaDocumentFields | None,
        user_fields: VespaDocumentUserFields | None,
    ) -> None:
-        """
-        NOTE: Remember to handle the secondary index here. There is no separate
-        pipeline for updating chunks in the secondary index. This design is not
-        ideal and we should reconsider this when revamping index swapping.
-        """
        if fields is None and user_fields is None:
            logger.warning(
                f"Tried to update document {doc_id} with no updated fields or user fields."
@@ -445,11 +392,6 @@ class OpenSearchOldDocumentIndex(OldDocumentIndex):

        try:
            self._real_index.update([update_request])
-            if self.secondary_index_name:
-                assert (
-                    self._secondary_real_index is not None
-                ), "Bug: Secondary index is not initialized."
-                self._secondary_real_index.update([update_request])
        except NotFoundError:
            logger.exception(
                f"Tried to update document {doc_id} but at least one of its chunks was not found in OpenSearch. "
--- a/backend/onyx/document_index/vespa/chunk_retrieval.py
+++ b/backend/onyx/document_index/vespa/chunk_retrieval.py
@@ -1,6 +1,5 @@
 import json
 import string
-import time
 from collections.abc import Callable
 from collections.abc import Mapping
 from datetime import datetime
@@ -19,7 +18,6 @@ from onyx.background.celery.tasks.opensearch_migration.transformer import (
 )
 from onyx.configs.app_configs import LOG_VESPA_TIMING_INFORMATION
 from onyx.configs.app_configs import VESPA_LANGUAGE_OVERRIDE
-from onyx.configs.app_configs import VESPA_MIGRATION_REQUEST_TIMEOUT_S
 from onyx.context.search.models import IndexFilters
 from onyx.context.search.models import InferenceChunkUncleaned
 from onyx.document_index.interfaces import VespaChunkRequest
@@ -340,18 +338,12 @@ def get_all_chunks_paginated(
            params["continuation"] = continuation_token

        response: httpx.Response | None = None
-        start_time = time.monotonic()
        try:
-            with get_vespa_http_client(
-                timeout=VESPA_MIGRATION_REQUEST_TIMEOUT_S
-            ) as http_client:
+            with get_vespa_http_client() as http_client:
                response = http_client.get(url, params=params)
                response.raise_for_status()
        except httpx.HTTPError as e:
-            error_base = (
-                f"Failed to get chunks from Vespa slice {slice_id} with continuation token "
-                f"{continuation_token} in {time.monotonic() - start_time:.3f} seconds."
-            )
+            error_base = f"Failed to get chunks from Vespa slice {slice_id} with continuation token {continuation_token}."
            logger.exception(
                f"Request URL: {e.request.url}\n"
                f"Request Headers: {e.request.headers}\n"
--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@@ -465,12 +465,6 @@ class VespaIndex(DocumentIndex):
        chunks: list[DocMetadataAwareIndexChunk],
        index_batch_params: IndexBatchParams,
    ) -> set[OldDocumentInsertionRecord]:
-        """
-        NOTE: Do NOT consider the secondary index here. A separate indexing
-        pipeline will be responsible for indexing to the secondary index. This
-        design is not ideal and we should reconsider this when revamping index
-        swapping.
-        """
        if len(index_batch_params.doc_id_to_previous_chunk_cnt) != len(
            index_batch_params.doc_id_to_new_chunk_cnt
        ):
@@ -665,10 +659,6 @@ class VespaIndex(DocumentIndex):
        """Note: if the document id does not exist, the update will be a no-op and the
        function will complete with no errors or exceptions.
        Handle other exceptions if you wish to implement retry behavior
-
-        NOTE: Remember to handle the secondary index here. There is no separate
-        pipeline for updating chunks in the secondary index. This design is not
-        ideal and we should reconsider this when revamping index swapping.
        """
        if fields is None and user_fields is None:
            logger.warning(
@@ -689,6 +679,13 @@ class VespaIndex(DocumentIndex):
                f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}."
            )

+        vespa_document_index = VespaDocumentIndex(
+            index_name=self.index_name,
+            tenant_state=tenant_state,
+            large_chunks_enabled=self.large_chunks_enabled,
+            httpx_client=self.httpx_client,
+        )
+
        project_ids: set[int] | None = None
        if user_fields is not None and user_fields.user_projects is not None:
            project_ids = set(user_fields.user_projects)
@@ -708,20 +705,7 @@ class VespaIndex(DocumentIndex):
            persona_ids=persona_ids,
        )

-        indices = [self.index_name]
-        if self.secondary_index_name:
-            indices.append(self.secondary_index_name)
-
-        for index_name in indices:
-            vespa_document_index = VespaDocumentIndex(
-                index_name=index_name,
-                tenant_state=tenant_state,
-                large_chunks_enabled=self.index_to_large_chunks_enabled.get(
-                    index_name, False
-                ),
-                httpx_client=self.httpx_client,
-            )
-            vespa_document_index.update([update_request])
+        vespa_document_index.update([update_request])

    def delete_single(
        self,
@@ -730,11 +714,6 @@ class VespaIndex(DocumentIndex):
        tenant_id: str,
        chunk_count: int | None,
    ) -> int:
-        """
-        NOTE: Remember to handle the secondary index here. There is no separate
-        pipeline for deleting chunks in the secondary index. This design is not
-        ideal and we should reconsider this when revamping index swapping.
-        """
        tenant_state = TenantState(
            tenant_id=get_current_tenant_id(),
            multitenant=MULTI_TENANT,
@@ -747,25 +726,13 @@ class VespaIndex(DocumentIndex):
            raise ValueError(
                f"Bug: Tenant ID mismatch. Expected {tenant_state.tenant_id}, got {tenant_id}."
            )
-        indices = [self.index_name]
-        if self.secondary_index_name:
-            indices.append(self.secondary_index_name)
-
-        total_chunks_deleted = 0
-        for index_name in indices:
-            vespa_document_index = VespaDocumentIndex(
-                index_name=index_name,
-                tenant_state=tenant_state,
-                large_chunks_enabled=self.index_to_large_chunks_enabled.get(
-                    index_name, False
-                ),
-                httpx_client=self.httpx_client,
-            )
-            total_chunks_deleted += vespa_document_index.delete(
-                document_id=doc_id, chunk_count=chunk_count
-            )
-
-        return total_chunks_deleted
+        vespa_document_index = VespaDocumentIndex(
+            index_name=self.index_name,
+            tenant_state=tenant_state,
+            large_chunks_enabled=self.large_chunks_enabled,
+            httpx_client=self.httpx_client,
+        )
+        return vespa_document_index.delete(document_id=doc_id, chunk_count=chunk_count)

    def id_based_retrieval(
        self,
--- a/backend/onyx/document_index/vespa/shared_utils/utils.py
+++ b/backend/onyx/document_index/vespa/shared_utils/utils.py
@@ -52,9 +52,7 @@ def replace_invalid_doc_id_characters(text: str) -> str:
    return text.replace("'", "_")


-def get_vespa_http_client(
-    no_timeout: bool = False, http2: bool = True, timeout: int | None = None
-) -> httpx.Client:
+def get_vespa_http_client(no_timeout: bool = False, http2: bool = True) -> httpx.Client:
    """
    Configures and returns an HTTP client for communicating with Vespa,
    including authentication if needed.
@@ -66,7 +64,7 @@ def get_vespa_http_client(
            else None
        ),
        verify=False if not MANAGED_VESPA else True,
-        timeout=None if no_timeout else (timeout or VESPA_REQUEST_TIMEOUT),
+        timeout=None if no_timeout else VESPA_REQUEST_TIMEOUT,
        http2=http2,
    )

--- a/backend/onyx/error_handling/init.py
+++ b/backend/onyx/error_handling/init.py
--- a/backend/onyx/error_handling/error_codes.py
+++ b/backend/onyx/error_handling/error_codes.py
@@ -1,101 +0,0 @@
-"""
-Standardized error codes for the Onyx backend.
-
-Usage:
-    from onyx.error_handling.error_codes import OnyxErrorCode
-    from onyx.error_handling.exceptions import OnyxError
-
-    raise OnyxError(OnyxErrorCode.UNAUTHENTICATED, "Token expired")
-"""
-
-from enum import Enum
-
-
-class OnyxErrorCode(Enum):
-    """
-    Each member is a tuple of (error_code_string, http_status_code).
-
-    The error_code_string is a stable, machine-readable identifier that
-    API consumers can match on. The http_status_code is the default HTTP
-    status to return.
-    """
-
-    # ------------------------------------------------------------------
-    # Authentication (401)
-    # ------------------------------------------------------------------
-    UNAUTHENTICATED = ("UNAUTHENTICATED", 401)
-    INVALID_TOKEN = ("INVALID_TOKEN", 401)
-    TOKEN_EXPIRED = ("TOKEN_EXPIRED", 401)
-    CSRF_FAILURE = ("CSRF_FAILURE", 403)
-
-    # ------------------------------------------------------------------
-    # Authorization (403)
-    # ------------------------------------------------------------------
-    UNAUTHORIZED = ("UNAUTHORIZED", 403)
-    INSUFFICIENT_PERMISSIONS = ("INSUFFICIENT_PERMISSIONS", 403)
-    ADMIN_ONLY = ("ADMIN_ONLY", 403)
-    EE_REQUIRED = ("EE_REQUIRED", 403)
-
-    # ------------------------------------------------------------------
-    # Validation / Bad Request (400)
-    # ------------------------------------------------------------------
-    VALIDATION_ERROR = ("VALIDATION_ERROR", 400)
-    INVALID_INPUT = ("INVALID_INPUT", 400)
-    MISSING_REQUIRED_FIELD = ("MISSING_REQUIRED_FIELD", 400)
-
-    # ------------------------------------------------------------------
-    # Not Found (404)
-    # ------------------------------------------------------------------
-    NOT_FOUND = ("NOT_FOUND", 404)
-    CONNECTOR_NOT_FOUND = ("CONNECTOR_NOT_FOUND", 404)
-    CREDENTIAL_NOT_FOUND = ("CREDENTIAL_NOT_FOUND", 404)
-    PERSONA_NOT_FOUND = ("PERSONA_NOT_FOUND", 404)
-    DOCUMENT_NOT_FOUND = ("DOCUMENT_NOT_FOUND", 404)
-    SESSION_NOT_FOUND = ("SESSION_NOT_FOUND", 404)
-    USER_NOT_FOUND = ("USER_NOT_FOUND", 404)
-
-    # ------------------------------------------------------------------
-    # Conflict (409)
-    # ------------------------------------------------------------------
-    CONFLICT = ("CONFLICT", 409)
-    DUPLICATE_RESOURCE = ("DUPLICATE_RESOURCE", 409)
-
-    # ------------------------------------------------------------------
-    # Rate Limiting / Quotas (429 / 402)
-    # ------------------------------------------------------------------
-    RATE_LIMITED = ("RATE_LIMITED", 429)
-    SEAT_LIMIT_EXCEEDED = ("SEAT_LIMIT_EXCEEDED", 402)
-
-    # ------------------------------------------------------------------
-    # Connector / Credential Errors (400-range)
-    # ------------------------------------------------------------------
-    CONNECTOR_VALIDATION_FAILED = ("CONNECTOR_VALIDATION_FAILED", 400)
-    CREDENTIAL_INVALID = ("CREDENTIAL_INVALID", 400)
-    CREDENTIAL_EXPIRED = ("CREDENTIAL_EXPIRED", 401)
-
-    # ------------------------------------------------------------------
-    # Server Errors (5xx)
-    # ------------------------------------------------------------------
-    INTERNAL_ERROR = ("INTERNAL_ERROR", 500)
-    NOT_IMPLEMENTED = ("NOT_IMPLEMENTED", 501)
-    SERVICE_UNAVAILABLE = ("SERVICE_UNAVAILABLE", 503)
-    BAD_GATEWAY = ("BAD_GATEWAY", 502)
-    LLM_PROVIDER_ERROR = ("LLM_PROVIDER_ERROR", 502)
-    GATEWAY_TIMEOUT = ("GATEWAY_TIMEOUT", 504)
-
-    def __init__(self, code: str, status_code: int) -> None:
-        self.code = code
-        self.status_code = status_code
-
-    def detail(self, message: str | None = None) -> dict[str, str]:
-        """Build a structured error detail dict.
-
-        Returns a dict like:
-            {"error_code": "UNAUTHENTICATED", "message": "Token expired"}
-
-        If no message is supplied, the error code itself is used as the message.
-        """
-        return {
-            "error_code": self.code,
-            "message": message or self.code,
-        }
--- a/backend/onyx/error_handling/exceptions.py
+++ b/backend/onyx/error_handling/exceptions.py
@@ -1,82 +0,0 @@
-"""OnyxError — the single exception type for all Onyx business errors.
-
-Raise ``OnyxError`` instead of ``HTTPException`` in business code.  A global
-FastAPI exception handler (registered via ``register_onyx_exception_handlers``)
-converts it into a JSON response with the standard
-``{"error_code": "...", "message": "..."}`` shape.
-
-Usage::
-
-    from onyx.error_handling.error_codes import OnyxErrorCode
-    from onyx.error_handling.exceptions import OnyxError
-
-    raise OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
-
-For upstream errors with a dynamic HTTP status (e.g. billing service),
-use ``status_code_override``::
-
-    raise OnyxError(
-        OnyxErrorCode.BAD_GATEWAY,
-        detail,
-        status_code_override=upstream_status,
-    )
-"""
-
-from fastapi import FastAPI
-from fastapi import Request
-from fastapi.responses import JSONResponse
-
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-class OnyxError(Exception):
-    """Structured error that maps to a specific ``OnyxErrorCode``.
-
-    Attributes:
-        error_code: The ``OnyxErrorCode`` enum member.
-        message: Human-readable message (defaults to the error code string).
-        status_code: HTTP status — either overridden or from the error code.
-    """
-
-    def __init__(
-        self,
-        error_code: OnyxErrorCode,
-        message: str | None = None,
-        *,
-        status_code_override: int | None = None,
-    ) -> None:
-        self.error_code = error_code
-        self.message = message or error_code.code
-        self._status_code_override = status_code_override
-        super().__init__(self.message)
-
-    @property
-    def status_code(self) -> int:
-        return self._status_code_override or self.error_code.status_code
-
-
-def register_onyx_exception_handlers(app: FastAPI) -> None:
-    """Register a global handler that converts ``OnyxError`` to JSON responses.
-
-    Must be called *after* the app is created but *before* it starts serving.
-    The handler logs at WARNING for 4xx and ERROR for 5xx.
-    """
-
-    @app.exception_handler(OnyxError)
-    async def _handle_onyx_error(
-        request: Request,  # noqa: ARG001
-        exc: OnyxError,
-    ) -> JSONResponse:
-        status_code = exc.status_code
-        if status_code >= 500:
-            logger.error(f"OnyxError {exc.error_code.code}: {exc.message}")
-        elif status_code >= 400:
-            logger.warning(f"OnyxError {exc.error_code.code}: {exc.message}")
-
-        return JSONResponse(
-            status_code=status_code,
-            content=exc.error_code.detail(exc.message),
-        )
--- a/backend/onyx/main.py
+++ b/backend/onyx/main.py
@@ -59,7 +59,6 @@ from onyx.db.engine.async_sql_engine import get_sqlalchemy_async_engine
 from onyx.db.engine.connection_warmup import warm_up_connections
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.engine.sql_engine import SqlEngine
-from onyx.error_handling.exceptions import register_onyx_exception_handlers
 from onyx.file_store.file_store import get_default_file_store
 from onyx.server.api_key.api import router as api_key_router
 from onyx.server.auth_check import check_router_auth
@@ -445,8 +444,6 @@ def get_application(lifespan_override: Lifespan | None = None) -> FastAPI:
        status.HTTP_500_INTERNAL_SERVER_ERROR, log_http_error
    )

-    register_onyx_exception_handlers(application)
-
    include_router_with_global_prefix_prepended(application, password_router)
    include_router_with_global_prefix_prepended(application, chat_router)
    include_router_with_global_prefix_prepended(application, query_router)
--- a/backend/onyx/onyxbot/slack/formatting.py
+++ b/backend/onyx/onyxbot/slack/formatting.py
@@ -130,7 +130,7 @@ def format_slack_message(message: str | None) -> str:
    message = _transform_outside_code_blocks(message, _sanitize_html)
    message = _convert_slack_links_to_markdown(message)
    normalized_message = _normalize_link_destinations(message)
-    md = create_markdown(renderer=SlackRenderer(), plugins=["strikethrough", "table"])
+    md = create_markdown(renderer=SlackRenderer(), plugins=["strikethrough"])
    result = md(normalized_message)
    # With HTMLRenderer, result is always str (not AST list)
    assert isinstance(result, str)
@@ -146,11 +146,6 @@ class SlackRenderer(HTMLRenderer):

    SPECIALS: dict[str, str] = {"&": "&amp;", "<": "&lt;", ">": "&gt;"}

-    def __init__(self) -> None:
-        super().__init__()
-        self._table_headers: list[str] = []
-        self._current_row_cells: list[str] = []
-
    def escape_special(self, text: str) -> str:
        for special, replacement in self.SPECIALS.items():
            text = text.replace(special, replacement)
@@ -223,48 +218,5 @@ class SlackRenderer(HTMLRenderer):
        # as literal &quot; text since Slack doesn't recognize that entity.
        return self.escape_special(text)

-    # -- Table rendering (converts markdown tables to vertical cards) --
-
-    def table_cell(
-        self, text: str, align: str | None = None, head: bool = False  # noqa: ARG002
-    ) -> str:
-        if head:
-            self._table_headers.append(text.strip())
-        else:
-            self._current_row_cells.append(text.strip())
-        return ""
-
-    def table_head(self, text: str) -> str:  # noqa: ARG002
-        self._current_row_cells = []
-        return ""
-
-    def table_row(self, text: str) -> str:  # noqa: ARG002
-        cells = self._current_row_cells
-        self._current_row_cells = []
-        # First column becomes the bold title, remaining columns are bulleted fields
-        lines: list[str] = []
-        if cells:
-            title = cells[0]
-            if title:
-                # Avoid double-wrapping if cell already contains bold markup
-                if title.startswith("*") and title.endswith("*") and len(title) > 1:
-                    lines.append(title)
-                else:
-                    lines.append(f"*{title}*")
-            for i, cell in enumerate(cells[1:], start=1):
-                if i < len(self._table_headers):
-                    lines.append(f"  • {self._table_headers[i]}: {cell}")
-                else:
-                    lines.append(f"  • {cell}")
-        return "\n".join(lines) + "\n\n"
-
-    def table_body(self, text: str) -> str:
-        return text
-
-    def table(self, text: str) -> str:
-        self._table_headers = []
-        self._current_row_cells = []
-        return text + "\n"
-
    def paragraph(self, text: str) -> str:
        return f"{text}\n\n"
--- a/backend/onyx/server/documents/connector.py
+++ b/backend/onyx/server/documents/connector.py
@@ -92,7 +92,6 @@ from onyx.db.connector_credential_pair import get_connector_credential_pairs_for
 from onyx.db.connector_credential_pair import (
    get_connector_credential_pairs_for_user_parallel,
 )
-from onyx.db.connector_credential_pair import verify_user_has_access_to_cc_pair
 from onyx.db.credentials import cleanup_gmail_credentials
 from onyx.db.credentials import cleanup_google_drive_credentials
 from onyx.db.credentials import create_credential
@@ -573,43 +572,6 @@ def _normalize_file_names_for_backwards_compatibility(
    return file_names + file_locations[len(file_names) :]


-def _fetch_and_check_file_connector_cc_pair_permissions(
-    connector_id: int,
-    user: User,
-    db_session: Session,
-    require_editable: bool,
-) -> ConnectorCredentialPair:
-    cc_pair = fetch_connector_credential_pair_for_connector(db_session, connector_id)
-    if cc_pair is None:
-        raise HTTPException(
-            status_code=404,
-            detail="No Connector-Credential Pair found for this connector",
-        )
-
-    has_requested_access = verify_user_has_access_to_cc_pair(
-        cc_pair_id=cc_pair.id,
-        db_session=db_session,
-        user=user,
-        get_editable=require_editable,
-    )
-    if has_requested_access:
-        return cc_pair
-
-    # Special case: global curators should be able to manage files
-    # for public file connectors even when they are not the creator.
-    if (
-        require_editable
-        and user.role == UserRole.GLOBAL_CURATOR
-        and cc_pair.access_type == AccessType.PUBLIC
-    ):
-        return cc_pair
-
-    raise HTTPException(
-        status_code=403,
-        detail="Access denied. User cannot manage files for this connector.",
-    )
-
-
@router.post("/admin/connector/file/upload", tags=PUBLIC_API_TAGS)
 def upload_files_api(
    files: list[UploadFile],
@@ -621,7 +583,7 @@ def upload_files_api(
@router.get("/admin/connector/{connector_id}/files", tags=PUBLIC_API_TAGS)
 def list_connector_files(
    connector_id: int,
-    user: User = Depends(current_curator_or_admin_user),
+    user: User = Depends(current_curator_or_admin_user),  # noqa: ARG001
    db_session: Session = Depends(get_session),
 ) -> ConnectorFilesResponse:
    """List all files in a file connector."""
@@ -634,13 +596,6 @@ def list_connector_files(
            status_code=400, detail="This endpoint only works with file connectors"
        )

-    _ = _fetch_and_check_file_connector_cc_pair_permissions(
-        connector_id=connector_id,
-        user=user,
-        db_session=db_session,
-        require_editable=False,
-    )
-
    file_locations = connector.connector_specific_config.get("file_locations", [])
    file_names = connector.connector_specific_config.get("file_names", [])

@@ -690,7 +645,7 @@ def update_connector_files(
    connector_id: int,
    files: list[UploadFile] | None = File(None),
    file_ids_to_remove: str = Form("[]"),
-    user: User = Depends(current_curator_or_admin_user),
+    user: User = Depends(current_curator_or_admin_user),  # noqa: ARG001
    db_session: Session = Depends(get_session),
 ) -> FileUploadResponse:
    """
@@ -708,13 +663,12 @@ def update_connector_files(
        )

    # Get the connector-credential pair for indexing/pruning triggers
-    # and validate user permissions for file management.
-    cc_pair = _fetch_and_check_file_connector_cc_pair_permissions(
-        connector_id=connector_id,
-        user=user,
-        db_session=db_session,
-        require_editable=True,
-    )
+    cc_pair = fetch_connector_credential_pair_for_connector(db_session, connector_id)
+    if cc_pair is None:
+        raise HTTPException(
+            status_code=404,
+            detail="No Connector-Credential Pair found for this connector",
+        )

    # Parse file IDs to remove
    try:
--- a/backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/package-lock.json
+++ b/backend/onyx/server/features/build/sandbox/kubernetes/docker/templates/outputs/web/package-lock.json
@@ -7424,9 +7424,9 @@
      }
    },
    "node_modules/hono": {
-      "version": "4.12.5",
-      "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.5.tgz",
-      "integrity": "sha512-3qq+FUBtlTHhtYxbxheZgY8NIFnkkC/MR8u5TTsr7YZ3wixryQ3cCwn3iZbg8p8B88iDBBAYSfZDS75t8MN7Vg==",
+      "version": "4.11.7",
+      "resolved": "https://registry.npmjs.org/hono/-/hono-4.11.7.tgz",
+      "integrity": "sha512-l7qMiNee7t82bH3SeyUCt9UF15EVmaBvsppY2zQtrbIhl/yzBTny+YUxsVjSjQ6gaqaeVtZmGocom8TzBlA4Yw==",
      "license": "MIT",
      "engines": {
        "node": ">=16.9.0"
--- a/backend/onyx/server/features/document_set/api.py
+++ b/backend/onyx/server/features/document_set/api.py
@@ -11,7 +11,6 @@ from onyx.configs.app_configs import DISABLE_VECTOR_DB
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.db.document_set import check_document_sets_are_public
-from onyx.db.document_set import delete_document_set as db_delete_document_set
 from onyx.db.document_set import fetch_all_document_sets_for_user
 from onyx.db.document_set import get_document_set_by_id
 from onyx.db.document_set import insert_document_set
@@ -143,10 +142,7 @@ def delete_document_set(
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

-    if DISABLE_VECTOR_DB:
-        db_session.refresh(document_set)
-        db_delete_document_set(document_set, db_session)
-    else:
+    if not DISABLE_VECTOR_DB:
        client_app.send_task(
            OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
            kwargs={"tenant_id": tenant_id},
--- a/backend/onyx/server/features/projects/projects_file_utils.py
+++ b/backend/onyx/server/features/projects/projects_file_utils.py
@@ -7,14 +7,13 @@ from PIL import UnidentifiedImageError
 from pydantic import BaseModel
 from pydantic import ConfigDict
 from pydantic import Field
-from sqlalchemy.orm import Session

 from onyx.configs.app_configs import FILE_TOKEN_COUNT_THRESHOLD
-from onyx.db.llm import fetch_default_llm_model
 from onyx.file_processing.extract_file_text import extract_file_text
 from onyx.file_processing.extract_file_text import get_file_ext
 from onyx.file_processing.file_types import OnyxFileExtensions
 from onyx.file_processing.password_validation import is_file_password_protected
+from onyx.llm.factory import get_default_llm
 from onyx.natural_language_processing.utils import get_tokenizer
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT
@@ -117,9 +116,7 @@ def estimate_image_tokens_for_upload(
            pass


-def categorize_uploaded_files(
-    files: list[UploadFile], db_session: Session
-) -> CategorizedFiles:
+def categorize_uploaded_files(files: list[UploadFile]) -> CategorizedFiles:
    """
    Categorize uploaded files based on text extractability and tokenized length.

@@ -131,11 +128,11 @@ def categorize_uploaded_files(
    """

    results = CategorizedFiles()
-    default_model = fetch_default_llm_model(db_session)
+    llm = get_default_llm()

-    model_name = default_model.name if default_model else None
-    provider_type = default_model.llm_provider.provider if default_model else None
-    tokenizer = get_tokenizer(model_name=model_name, provider_type=provider_type)
+    tokenizer = get_tokenizer(
+        model_name=llm.config.model_name, provider_type=llm.config.model_provider
+    )

    # Check if threshold checks should be skipped
    skip_threshold = False
--- a/backend/onyx/server/manage/embedding/api.py
+++ b/backend/onyx/server/manage/embedding/api.py
@@ -1,5 +1,6 @@
 from fastapi import APIRouter
 from fastapi import Depends
+from fastapi import HTTPException
 from sqlalchemy.orm import Session

 from onyx.auth.users import current_admin_user
@@ -10,8 +11,6 @@ from onyx.db.llm import upsert_cloud_embedding_provider
 from onyx.db.models import User
 from onyx.db.search_settings import get_all_search_settings
 from onyx.db.search_settings import get_current_db_embedding_provider
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
 from onyx.indexing.models import EmbeddingModelDetail
 from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
 from onyx.server.manage.embedding.models import CloudEmbeddingProvider
@@ -60,7 +59,7 @@ def test_embedding_configuration(
    except Exception as e:
        error_msg = "An error occurred while testing your embedding model. Please check your configuration."
        logger.error(f"{error_msg} Error message: {e}", exc_info=True)
-        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, error_msg)
+        raise HTTPException(status_code=400, detail=error_msg)


@admin_router.get("", response_model=list[EmbeddingModelDetail])
@@ -94,9 +93,8 @@ def delete_embedding_provider(
        embedding_provider is not None
        and provider_type == embedding_provider.provider_type
    ):
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            "You can't delete a currently active model",
+        raise HTTPException(
+            status_code=400, detail="You can't delete a currently active model"
        )

    remove_embedding_provider(db_session, provider_type=provider_type)
--- a/backend/onyx/server/manage/llm/api.py
+++ b/backend/onyx/server/manage/llm/api.py
@@ -11,6 +11,7 @@ from botocore.exceptions import ClientError
 from botocore.exceptions import NoCredentialsError
 from fastapi import APIRouter
 from fastapi import Depends
+from fastapi import HTTPException
 from fastapi import Query
 from pydantic import ValidationError
 from sqlalchemy.orm import Session
@@ -37,8 +38,6 @@ from onyx.db.llm import upsert_llm_provider
 from onyx.db.llm import validate_persona_ids_exist
 from onyx.db.models import User
 from onyx.db.persona import user_can_access_persona
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
 from onyx.llm.factory import get_default_llm
 from onyx.llm.factory import get_llm
 from onyx.llm.factory import get_max_input_tokens_from_llm_provider
@@ -187,7 +186,7 @@ def _validate_llm_provider_change(
    Only enforced in MULTI_TENANT mode.

    Raises:
-        OnyxError: If api_base or custom_config changed without changing API key
+        HTTPException: If api_base or custom_config changed without changing API key
    """
    if not MULTI_TENANT or api_key_changed:
        return
@@ -201,9 +200,9 @@ def _validate_llm_provider_change(
    )

    if api_base_changed or custom_config_changed:
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            "API base and/or custom config cannot be changed without changing the API key",
+        raise HTTPException(
+            status_code=400,
+            detail="API base and/or custom config cannot be changed without changing the API key",
        )


@@ -223,7 +222,7 @@ def fetch_llm_provider_options(
    for well_known_llm in well_known_llms:
        if well_known_llm.name == provider_name:
            return well_known_llm
-    raise OnyxError(OnyxErrorCode.NOT_FOUND, f"Provider {provider_name} not found")
+    raise HTTPException(status_code=404, detail=f"Provider {provider_name} not found")


@admin_router.post("/test")
@@ -282,7 +281,7 @@ def test_llm_configuration(
    error_msg = test_llm(llm)

    if error_msg:
-        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, error_msg)
+        raise HTTPException(status_code=400, detail=error_msg)


@admin_router.post("/test/default")
@@ -293,11 +292,11 @@ def test_default_provider(
        llm = get_default_llm()
    except ValueError:
        logger.exception("Failed to fetch default LLM Provider")
-        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, "No LLM Provider setup")
+        raise HTTPException(status_code=400, detail="No LLM Provider setup")

    error = test_llm(llm)
    if error:
-        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(error))
+        raise HTTPException(status_code=400, detail=str(error))


@admin_router.get("/provider")
@@ -363,31 +362,35 @@ def put_llm_provider(
    # Check name constraints
    # TODO: Once port from name to id is complete, unique name will no longer be required
    if existing_provider and llm_provider_upsert_request.name != existing_provider.name:
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            "Renaming providers is not currently supported",
+        raise HTTPException(
+            status_code=400,
+            detail="Renaming providers is not currently supported",
        )

    found_provider = fetch_existing_llm_provider(
        name=llm_provider_upsert_request.name, db_session=db_session
    )
    if found_provider is not None and found_provider is not existing_provider:
-        raise OnyxError(
-            OnyxErrorCode.DUPLICATE_RESOURCE,
-            f"Provider with name={llm_provider_upsert_request.name} already exists",
+        raise HTTPException(
+            status_code=400,
+            detail=f"Provider with name={llm_provider_upsert_request.name} already exists",
        )

    if existing_provider and is_creation:
-        raise OnyxError(
-            OnyxErrorCode.DUPLICATE_RESOURCE,
-            f"LLM Provider with name {llm_provider_upsert_request.name} and "
-            f"id={llm_provider_upsert_request.id} already exists",
+        raise HTTPException(
+            status_code=400,
+            detail=(
+                f"LLM Provider with name {llm_provider_upsert_request.name} and "
+                f"id={llm_provider_upsert_request.id} already exists"
+            ),
        )
    elif not existing_provider and not is_creation:
-        raise OnyxError(
-            OnyxErrorCode.NOT_FOUND,
-            f"LLM Provider with name {llm_provider_upsert_request.name} and "
-            f"id={llm_provider_upsert_request.id} does not exist",
+        raise HTTPException(
+            status_code=400,
+            detail=(
+                f"LLM Provider with name {llm_provider_upsert_request.name} and "
+                f"id={llm_provider_upsert_request.id} does not exist"
+            ),
        )

    # SSRF Protection: Validate api_base and custom_config match stored values
@@ -412,9 +415,9 @@ def put_llm_provider(
            db_session, persona_ids
        )
        if missing_personas:
-            raise OnyxError(
-                OnyxErrorCode.VALIDATION_ERROR,
-                f"Invalid persona IDs: {', '.join(map(str, missing_personas))}",
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid persona IDs: {', '.join(map(str, missing_personas))}",
            )
        # Remove duplicates while preserving order
        seen: set[int] = set()
@@ -470,7 +473,7 @@ def put_llm_provider(
        return result
    except ValueError as e:
        logger.exception("Failed to upsert LLM Provider")
-        raise OnyxError(OnyxErrorCode.VALIDATION_ERROR, str(e))
+        raise HTTPException(status_code=400, detail=str(e))


@admin_router.delete("/provider/{provider_id}")
@@ -480,19 +483,19 @@ def delete_llm_provider(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
 ) -> None:
-    if not force:
-        model = fetch_default_llm_model(db_session)
-
-        if model and model.llm_provider_id == provider_id:
-            raise OnyxError(
-                OnyxErrorCode.VALIDATION_ERROR,
-                "Cannot delete the default LLM provider",
-            )
-
    try:
+        if not force:
+            model = fetch_default_llm_model(db_session)
+
+            if model and model.llm_provider_id == provider_id:
+                raise HTTPException(
+                    status_code=400,
+                    detail="Cannot delete the default LLM provider",
+                )
+
        remove_llm_provider(db_session, provider_id)
    except ValueError as e:
-        raise OnyxError(OnyxErrorCode.NOT_FOUND, str(e))
+        raise HTTPException(status_code=404, detail=str(e))


@admin_router.post("/default")
@@ -532,9 +535,9 @@ def get_auto_config(
    """
    config = fetch_llm_recommendations_from_github()
    if not config:
-        raise OnyxError(
-            OnyxErrorCode.BAD_GATEWAY,
-            "Failed to fetch configuration from GitHub",
+        raise HTTPException(
+            status_code=502,
+            detail="Failed to fetch configuration from GitHub",
        )
    return config.model_dump()

@@ -691,13 +694,13 @@ def list_llm_providers_for_persona(

    persona = fetch_persona_with_groups(db_session, persona_id)
    if not persona:
-        raise OnyxError(OnyxErrorCode.PERSONA_NOT_FOUND, "Persona not found")
+        raise HTTPException(status_code=404, detail="Persona not found")

    # Verify user has access to this persona
    if not user_can_access_persona(db_session, persona_id, user, get_editable=False):
-        raise OnyxError(
-            OnyxErrorCode.INSUFFICIENT_PERMISSIONS,
-            "You don't have access to this assistant",
+        raise HTTPException(
+            status_code=403,
+            detail="You don't have access to this assistant",
        )

    is_admin = user.role == UserRole.ADMIN
@@ -851,9 +854,9 @@ def get_bedrock_available_models(
        try:
            bedrock = session.client("bedrock")
        except Exception as e:
-            raise OnyxError(
-                OnyxErrorCode.CREDENTIAL_INVALID,
-                f"Failed to create Bedrock client: {e}. Check AWS credentials and region.",
+            raise HTTPException(
+                status_code=400,
+                detail=f"Failed to create Bedrock client: {e}. Check AWS credentials and region.",
            )

        # Build model info dict from foundation models (modelId -> metadata)
@@ -972,14 +975,14 @@ def get_bedrock_available_models(
        return results

    except (ClientError, NoCredentialsError, BotoCoreError) as e:
-        raise OnyxError(
-            OnyxErrorCode.CREDENTIAL_INVALID,
-            f"Failed to connect to AWS Bedrock: {e}",
+        raise HTTPException(
+            status_code=400,
+            detail=f"Failed to connect to AWS Bedrock: {e}",
        )
    except Exception as e:
-        raise OnyxError(
-            OnyxErrorCode.INTERNAL_ERROR,
-            f"Unexpected error fetching Bedrock models: {e}",
+        raise HTTPException(
+            status_code=500,
+            detail=f"Unexpected error fetching Bedrock models: {e}",
        )


@@ -991,9 +994,9 @@ def _get_ollama_available_model_names(api_base: str) -> set[str]:
        response.raise_for_status()
        response_json = response.json()
    except Exception as e:
-        raise OnyxError(
-            OnyxErrorCode.BAD_GATEWAY,
-            f"Failed to fetch Ollama models: {e}",
+        raise HTTPException(
+            status_code=400,
+            detail=f"Failed to fetch Ollama models: {e}",
        )

    models = response_json.get("models", [])
@@ -1010,9 +1013,9 @@ def get_ollama_available_models(

    cleaned_api_base = request.api_base.strip().rstrip("/")
    if not cleaned_api_base:
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            "API base URL is required to fetch Ollama models.",
+        raise HTTPException(
+            status_code=400,
+            detail="API base URL is required to fetch Ollama models.",
        )

    # NOTE: most people run Ollama locally, so we don't disallow internal URLs
@@ -1021,9 +1024,9 @@ def get_ollama_available_models(
    # with the same response format
    model_names = _get_ollama_available_model_names(cleaned_api_base)
    if not model_names:
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            "No models found from your Ollama server",
+        raise HTTPException(
+            status_code=400,
+            detail="No models found from your Ollama server",
        )

    all_models_with_context_size_and_vision: list[OllamaFinalModelResponse] = []
@@ -1125,9 +1128,9 @@ def _get_openrouter_models_response(api_base: str, api_key: str) -> dict:
        response.raise_for_status()
        return response.json()
    except Exception as e:
-        raise OnyxError(
-            OnyxErrorCode.BAD_GATEWAY,
-            f"Failed to fetch OpenRouter models: {e}",
+        raise HTTPException(
+            status_code=400,
+            detail=f"Failed to fetch OpenRouter models: {e}",
        )


@@ -1148,9 +1151,9 @@ def get_openrouter_available_models(

    data = response_json.get("data", [])
    if not isinstance(data, list) or len(data) == 0:
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            "No models found from your OpenRouter endpoint",
+        raise HTTPException(
+            status_code=400,
+            detail="No models found from your OpenRouter endpoint",
        )

    results: list[OpenRouterFinalModelResponse] = []
@@ -1185,9 +1188,8 @@ def get_openrouter_available_models(
            )

    if not results:
-        raise OnyxError(
-            OnyxErrorCode.VALIDATION_ERROR,
-            "No compatible models found from OpenRouter",
+        raise HTTPException(
+            status_code=400, detail="No compatible models found from OpenRouter"
        )

    sorted_results = sorted(results, key=lambda m: m.name.lower())
--- a/backend/onyx/server/manage/search_settings.py
+++ b/backend/onyx/server/manage/search_settings.py
@@ -6,11 +6,8 @@ from sqlalchemy.orm import Session

 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_user
-from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
 from onyx.context.search.models import SavedSearchSettings
 from onyx.context.search.models import SearchSettingsCreationRequest
-from onyx.db.connector_credential_pair import get_connector_credential_pairs
-from onyx.db.connector_credential_pair import resync_cc_pair
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.index_attempt import expire_index_attempts
 from onyx.db.llm import fetch_existing_llm_provider
@@ -18,25 +15,20 @@ from onyx.db.llm import update_default_contextual_model
 from onyx.db.llm import update_no_default_contextual_rag_provider
 from onyx.db.models import IndexModelStatus
 from onyx.db.models import User
-from onyx.db.search_settings import create_search_settings
 from onyx.db.search_settings import delete_search_settings
 from onyx.db.search_settings import get_current_search_settings
-from onyx.db.search_settings import get_embedding_provider_from_provider_type
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.search_settings import update_current_search_settings
 from onyx.db.search_settings import update_search_settings_status
-from onyx.document_index.factory import get_all_document_indices
 from onyx.document_index.factory import get_default_document_index
 from onyx.file_processing.unstructured import delete_unstructured_api_key
 from onyx.file_processing.unstructured import get_unstructured_api_key
 from onyx.file_processing.unstructured import update_unstructured_api_key
-from onyx.natural_language_processing.search_nlp_models import clean_model_name
 from onyx.server.manage.embedding.models import SearchSettingsDeleteRequest
 from onyx.server.manage.models import FullModelVersionResponse
 from onyx.server.models import IdReturn
 from onyx.server.utils_vector_db import require_vector_db
 from onyx.utils.logger import setup_logger
-from shared_configs.configs import ALT_INDEX_SUFFIX
 from shared_configs.configs import MULTI_TENANT

 router = APIRouter(prefix="/search-settings")
@@ -49,99 +41,110 @@ def set_new_search_settings(
    _: User = Depends(current_admin_user),
    db_session: Session = Depends(get_session),  # noqa: ARG001
 ) -> IdReturn:
+    """Creates a new EmbeddingModel row and cancels the previous secondary indexing if any
+    Gives an error if the same model name is used as the current or secondary index
    """
-    Creates a new SearchSettings row and cancels the previous secondary indexing
-    if any exists.
-    """
-    if search_settings_new.index_name:
-        logger.warning("Index name was specified by request, this is not suggested")
-
-    # Disallow contextual RAG for cloud deployments.
-    if MULTI_TENANT and search_settings_new.enable_contextual_rag:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Contextual RAG disabled in Onyx Cloud",
-        )
-
-    # Validate cloud provider exists or create new LiteLLM provider.
-    if search_settings_new.provider_type is not None:
-        cloud_provider = get_embedding_provider_from_provider_type(
-            db_session, provider_type=search_settings_new.provider_type
-        )
-
-        if cloud_provider is None:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
-            )
-
-    validate_contextual_rag_model(
-        provider_name=search_settings_new.contextual_rag_llm_provider,
-        model_name=search_settings_new.contextual_rag_llm_name,
-        db_session=db_session,
+    # TODO(andrei): Re-enable.
+    # NOTE Enable integration external dependency tests in test_search_settings.py
+    # when this is reenabled. They are currently skipped
+    logger.error("Setting new search settings is temporarily disabled.")
+    raise HTTPException(
+        status_code=status.HTTP_501_NOT_IMPLEMENTED,
+        detail="Setting new search settings is temporarily disabled.",
    )
+    # if search_settings_new.index_name:
+    #     logger.warning("Index name was specified by request, this is not suggested")

-    search_settings = get_current_search_settings(db_session)
+    # # Disallow contextual RAG for cloud deployments
+    # if MULTI_TENANT and search_settings_new.enable_contextual_rag:
+    #     raise HTTPException(
+    #         status_code=status.HTTP_400_BAD_REQUEST,
+    #         detail="Contextual RAG disabled in Onyx Cloud",
+    #     )

-    if search_settings_new.index_name is None:
-        # We define index name here.
-        index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
-        if (
-            search_settings_new.model_name == search_settings.model_name
-            and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
-        ):
-            index_name += ALT_INDEX_SUFFIX
-        search_values = search_settings_new.model_dump()
-        search_values["index_name"] = index_name
-        new_search_settings_request = SavedSearchSettings(**search_values)
-    else:
-        new_search_settings_request = SavedSearchSettings(
-            **search_settings_new.model_dump()
-        )
+    # # Validate cloud provider exists or create new LiteLLM provider
+    # if search_settings_new.provider_type is not None:
+    #     cloud_provider = get_embedding_provider_from_provider_type(
+    #         db_session, provider_type=search_settings_new.provider_type
+    #     )

-    secondary_search_settings = get_secondary_search_settings(db_session)
+    #     if cloud_provider is None:
+    #         raise HTTPException(
+    #             status_code=status.HTTP_400_BAD_REQUEST,
+    #             detail=f"No embedding provider exists for cloud embedding type {search_settings_new.provider_type}",
+    #         )

-    if secondary_search_settings:
-        # Cancel any background indexing jobs.
-        expire_index_attempts(
-            search_settings_id=secondary_search_settings.id, db_session=db_session
-        )
+    # validate_contextual_rag_model(
+    #     provider_name=search_settings_new.contextual_rag_llm_provider,
+    #     model_name=search_settings_new.contextual_rag_llm_name,
+    #     db_session=db_session,
+    # )

-        # Mark previous model as a past model directly.
-        update_search_settings_status(
-            search_settings=secondary_search_settings,
-            new_status=IndexModelStatus.PAST,
-            db_session=db_session,
-        )
+    # search_settings = get_current_search_settings(db_session)

-    new_search_settings = create_search_settings(
-        search_settings=new_search_settings_request, db_session=db_session
-    )
+    # if search_settings_new.index_name is None:
+    #     # We define index name here
+    #     index_name = f"danswer_chunk_{clean_model_name(search_settings_new.model_name)}"
+    #     if (
+    #         search_settings_new.model_name == search_settings.model_name
+    #         and not search_settings.index_name.endswith(ALT_INDEX_SUFFIX)
+    #     ):
+    #         index_name += ALT_INDEX_SUFFIX
+    #     search_values = search_settings_new.model_dump()
+    #     search_values["index_name"] = index_name
+    #     new_search_settings_request = SavedSearchSettings(**search_values)
+    # else:
+    #     new_search_settings_request = SavedSearchSettings(
+    #         **search_settings_new.model_dump()
+    #     )

-    # Ensure the document indices have the new index immediately.
-    document_indices = get_all_document_indices(search_settings, new_search_settings)
-    for document_index in document_indices:
-        document_index.ensure_indices_exist(
-            primary_embedding_dim=search_settings.final_embedding_dim,
-            primary_embedding_precision=search_settings.embedding_precision,
-            secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
-            secondary_index_embedding_precision=new_search_settings.embedding_precision,
-        )
+    # secondary_search_settings = get_secondary_search_settings(db_session)

-    # Pause index attempts for the currently in-use index to preserve resources.
-    if DISABLE_INDEX_UPDATE_ON_SWAP:
-        expire_index_attempts(
-            search_settings_id=search_settings.id, db_session=db_session
-        )
-        for cc_pair in get_connector_credential_pairs(db_session):
-            resync_cc_pair(
-                cc_pair=cc_pair,
-                search_settings_id=new_search_settings.id,
-                db_session=db_session,
-            )
+    # if secondary_search_settings:
+    #     # Cancel any background indexing jobs
+    #     expire_index_attempts(
+    #         search_settings_id=secondary_search_settings.id, db_session=db_session
+    #     )

-    db_session.commit()
-    return IdReturn(id=new_search_settings.id)
+    #     # Mark previous model as a past model directly
+    #     update_search_settings_status(
+    #         search_settings=secondary_search_settings,
+    #         new_status=IndexModelStatus.PAST,
+    #         db_session=db_session,
+    #     )
+
+    # new_search_settings = create_search_settings(
+    #     search_settings=new_search_settings_request, db_session=db_session
+    # )
+
+    # # Ensure Vespa has the new index immediately
+    # get_multipass_config(search_settings)
+    # get_multipass_config(new_search_settings)
+    # document_index = get_default_document_index(
+    #     search_settings, new_search_settings, db_session
+    # )
+
+    # document_index.ensure_indices_exist(
+    #     primary_embedding_dim=search_settings.final_embedding_dim,
+    #     primary_embedding_precision=search_settings.embedding_precision,
+    #     secondary_index_embedding_dim=new_search_settings.final_embedding_dim,
+    #     secondary_index_embedding_precision=new_search_settings.embedding_precision,
+    # )
+
+    # # Pause index attempts for the currently in use index to preserve resources
+    # if DISABLE_INDEX_UPDATE_ON_SWAP:
+    #     expire_index_attempts(
+    #         search_settings_id=search_settings.id, db_session=db_session
+    #     )
+    #     for cc_pair in get_connector_credential_pairs(db_session):
+    #         resync_cc_pair(
+    #             cc_pair=cc_pair,
+    #             search_settings_id=new_search_settings.id,
+    #             db_session=db_session,
+    #         )
+
+    # db_session.commit()
+    # return IdReturn(id=new_search_settings.id)


@router.post("/cancel-new-embedding", dependencies=[Depends(require_vector_db)])
--- a/backend/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/onyx/server/query_and_chat/chat_backend.py
@@ -1,5 +1,6 @@
 import datetime
 import json
+import os
 from collections.abc import Generator
 from datetime import timedelta
 from uuid import UUID
@@ -60,6 +61,7 @@ from onyx.db.persona import get_persona_by_id
 from onyx.db.usage import increment_usage
 from onyx.db.usage import UsageType
 from onyx.db.user_file import get_file_id_by_user_file_id
+from onyx.file_processing.extract_file_text import docx_to_txt_filename
 from onyx.file_store.file_store import get_default_file_store
 from onyx.llm.constants import LlmProviderNames
 from onyx.llm.factory import get_default_llm
@@ -810,6 +812,18 @@ def fetch_chat_file(
    if not file_record:
        raise HTTPException(status_code=404, detail="File not found")

+    original_file_name = file_record.display_name
+    if file_record.file_type.startswith(
+        "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+    ):
+        # Check if a converted text file exists for .docx files
+        txt_file_name = docx_to_txt_filename(original_file_name)
+        txt_file_id = os.path.join(os.path.dirname(file_id), txt_file_name)
+        txt_file_record = file_store.read_file_record(txt_file_id)
+        if txt_file_record:
+            file_record = txt_file_record
+            file_id = txt_file_id
+
    media_type = file_record.file_type
    file_io = file_store.read_file(file_id, mode="b")

--- a/backend/onyx/server/settings/models.py
+++ b/backend/onyx/server/settings/models.py
@@ -60,11 +60,9 @@ class Settings(BaseModel):
    deep_research_enabled: bool | None = None
    search_ui_enabled: bool | None = None

-    # Whether EE features are unlocked for use.
-    # Depends on license status: True when the user has a valid license
-    # (ACTIVE, GRACE_PERIOD, PAYMENT_REMINDER), False when there's no license
-    # or the license is expired (GATED_ACCESS).
-    # This controls UI visibility of EE features (user groups, analytics, RBAC, etc.).
+    # Enterprise features flag - set by license enforcement at runtime
+    # When LICENSE_ENFORCEMENT_ENABLED=true, this reflects license status
+    # When LICENSE_ENFORCEMENT_ENABLED=false, defaults to False
    ee_features_enabled: bool = False

    temperature_override_enabled: bool | None = False
--- a/backend/onyx/tools/models.py
+++ b/backend/onyx/tools/models.py
@@ -93,8 +93,6 @@ class ToolResponse(BaseModel):
        # | WebContentResponse
        # This comes from custom tools, tool result needs to be saved
        | CustomToolCallSummary
-        # This comes from code interpreter, carries generated files
-        | PythonToolRichResponse
        # If the rich response is a string, this is what's saved to the tool call in the DB
        | str
        | None  # If nothing needs to be persisted outside of the string value passed to the LLM
@@ -195,12 +193,6 @@ class ChatFile(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)


-class PythonToolRichResponse(BaseModel):
-    """Rich response from the Python tool carrying generated files."""
-
-    generated_files: list[PythonExecutionFile] = []
-
-
 class PythonToolOverrideKwargs(BaseModel):
    """Override kwargs for the Python/Code Interpreter tool."""

@@ -253,7 +245,6 @@ class ToolCallInfo(BaseModel):
    tool_call_response: str
    search_docs: list[SearchDoc] | None = None
    generated_images: list[GeneratedImage] | None = None
-    generated_files: list[PythonExecutionFile] | None = None


 CHAT_SESSION_ID_PLACEHOLDER = "CHAT_SESSION_ID"
--- a/backend/onyx/tools/tool_implementations/python/code_interpreter_client.py
+++ b/backend/onyx/tools/tool_implementations/python/code_interpreter_client.py
@@ -1,7 +1,4 @@
-from __future__ import annotations
-
 import json
-import time
 from collections.abc import Generator
 from typing import Literal
 from typing import TypedDict
@@ -15,9 +12,6 @@ from onyx.utils.logger import setup_logger

 logger = setup_logger()

-_HEALTH_CACHE_TTL_SECONDS = 30
-_health_cache: dict[str, tuple[float, bool]] = {}
-

 class FileInput(TypedDict):
    """Input file to be staged in execution workspace"""
@@ -86,19 +80,6 @@ class CodeInterpreterClient:
            raise ValueError("CODE_INTERPRETER_BASE_URL not configured")
        self.base_url = base_url.rstrip("/")
        self.session = requests.Session()
-        self._closed = False
-
-    def __enter__(self) -> CodeInterpreterClient:
-        return self
-
-    def __exit__(self, *args: object) -> None:
-        self.close()
-
-    def close(self) -> None:
-        if self._closed:
-            return
-        self.session.close()
-        self._closed = True

    def _build_payload(
        self,
@@ -117,32 +98,16 @@ class CodeInterpreterClient:
            payload["files"] = files
        return payload

-    def health(self, use_cache: bool = False) -> bool:
-        """Check if the Code Interpreter service is healthy
-
-        Args:
-            use_cache: When True, return a cached result if available and
-                       within the TTL window. The cache is always populated
-                       after a live request regardless of this flag.
-        """
-        if use_cache:
-            cached = _health_cache.get(self.base_url)
-            if cached is not None:
-                cached_at, cached_result = cached
-                if time.monotonic() - cached_at < _HEALTH_CACHE_TTL_SECONDS:
-                    return cached_result
-
+    def health(self) -> bool:
+        """Check if the Code Interpreter service is healthy"""
        url = f"{self.base_url}/health"
        try:
            response = self.session.get(url, timeout=5)
            response.raise_for_status()
-            result = response.json().get("status") == "ok"
+            return response.json().get("status") == "ok"
        except Exception as e:
            logger.warning(f"Exception caught when checking health, e={e}")
-            result = False
-
-        _health_cache[self.base_url] = (time.monotonic(), result)
-        return result
+            return False

    def execute(
        self,
@@ -192,11 +157,8 @@ class CodeInterpreterClient:
            yield from self._batch_as_stream(code, stdin, timeout_ms, files)
            return

-        try:
-            response.raise_for_status()
-            yield from self._parse_sse(response)
-        finally:
-            response.close()
+        response.raise_for_status()
+        yield from self._parse_sse(response)

    def _parse_sse(
        self, response: requests.Response
--- a/backend/onyx/tools/tool_implementations/python/python_tool.py
+++ b/backend/onyx/tools/tool_implementations/python/python_tool.py
@@ -23,7 +23,6 @@ from onyx.tools.interface import Tool
 from onyx.tools.models import LlmPythonExecutionResult
 from onyx.tools.models import PythonExecutionFile
 from onyx.tools.models import PythonToolOverrideKwargs
-from onyx.tools.models import PythonToolRichResponse
 from onyx.tools.models import ToolCallException
 from onyx.tools.models import ToolResponse
 from onyx.tools.tool_implementations.python.code_interpreter_client import (
@@ -108,11 +107,7 @@ class PythonTool(Tool[PythonToolOverrideKwargs]):
        if not CODE_INTERPRETER_BASE_URL:
            return False
        server = fetch_code_interpreter_server(db_session)
-        if not server.server_enabled:
-            return False
-
-        with CodeInterpreterClient() as client:
-            return client.health(use_cache=True)
+        return server.server_enabled

    def tool_definition(self) -> dict:
        return {
@@ -176,203 +171,194 @@ class PythonTool(Tool[PythonToolOverrideKwargs]):
            )
        )

-        # Create Code Interpreter client — context manager ensures
-        # session.close() is called on every exit path.
-        with CodeInterpreterClient() as client:
-            # Stage chat files for execution
-            files_to_stage: list[FileInput] = []
-            for ind, chat_file in enumerate(chat_files):
-                file_name = chat_file.filename or f"file_{ind}"
-                try:
-                    # Upload to Code Interpreter
-                    ci_file_id = client.upload_file(chat_file.content, file_name)
-
-                    # Stage for execution
-                    files_to_stage.append({"path": file_name, "file_id": ci_file_id})
-
-                    logger.info(f"Staged file for Python execution: {file_name}")
-
-                except Exception as e:
-                    logger.warning(f"Failed to stage file {file_name}: {e}")
+        # Create Code Interpreter client
+        client = CodeInterpreterClient()

+        # Stage chat files for execution
+        files_to_stage: list[FileInput] = []
+        for ind, chat_file in enumerate(chat_files):
+            file_name = chat_file.filename or f"file_{ind}"
            try:
-                logger.debug(f"Executing code: {code}")
+                # Upload to Code Interpreter
+                ci_file_id = client.upload_file(chat_file.content, file_name)

-                # Execute code with streaming (falls back to batch if unavailable)
-                stdout_parts: list[str] = []
-                stderr_parts: list[str] = []
-                result_event: StreamResultEvent | None = None
+                # Stage for execution
+                files_to_stage.append({"path": file_name, "file_id": ci_file_id})

-                for event in client.execute_streaming(
-                    code=code,
-                    timeout_ms=CODE_INTERPRETER_DEFAULT_TIMEOUT_MS,
-                    files=files_to_stage or None,
-                ):
-                    if isinstance(event, StreamOutputEvent):
-                        if event.stream == "stdout":
-                            stdout_parts.append(event.data)
-                        else:
-                            stderr_parts.append(event.data)
-                        # Emit incremental delta to frontend
-                        self.emitter.emit(
-                            Packet(
-                                placement=placement,
-                                obj=PythonToolDelta(
-                                    stdout=(
-                                        event.data if event.stream == "stdout" else ""
-                                    ),
-                                    stderr=(
-                                        event.data if event.stream == "stderr" else ""
-                                    ),
-                                ),
-                            )
-                        )
-                    elif isinstance(event, StreamResultEvent):
-                        result_event = event
-                    elif isinstance(event, StreamErrorEvent):
-                        raise RuntimeError(f"Code interpreter error: {event.message}")
+                logger.info(f"Staged file for Python execution: {file_name}")

-                if result_event is None:
-                    raise RuntimeError(
-                        "Code interpreter stream ended without a result event"
-                    )
+            except Exception as e:
+                logger.warning(f"Failed to stage file {file_name}: {e}")

-                full_stdout = "".join(stdout_parts)
-                full_stderr = "".join(stderr_parts)
+        try:
+            logger.debug(f"Executing code: {code}")

-                # Truncate output for LLM consumption
-                truncated_stdout = _truncate_output(
-                    full_stdout, CODE_INTERPRETER_MAX_OUTPUT_LENGTH, "stdout"
-                )
-                truncated_stderr = _truncate_output(
-                    full_stderr, CODE_INTERPRETER_MAX_OUTPUT_LENGTH, "stderr"
-                )
+            # Execute code with streaming (falls back to batch if unavailable)
+            stdout_parts: list[str] = []
+            stderr_parts: list[str] = []
+            result_event: StreamResultEvent | None = None

-                # Handle generated files
-                generated_files: list[PythonExecutionFile] = []
-                generated_file_ids: list[str] = []
-                file_ids_to_cleanup: list[str] = []
-                file_store = get_default_file_store()
-
-                for workspace_file in result_event.files:
-                    if workspace_file.kind != "file" or not workspace_file.file_id:
-                        continue
-
-                    try:
-                        # Download file from Code Interpreter
-                        file_content = client.download_file(workspace_file.file_id)
-
-                        # Determine MIME type from file extension
-                        filename = workspace_file.path.split("/")[-1]
-                        mime_type, _ = mimetypes.guess_type(filename)
-                        # Default to binary if we can't determine the type
-                        mime_type = mime_type or "application/octet-stream"
-
-                        # Save to Onyx file store
-                        onyx_file_id = file_store.save_file(
-                            content=BytesIO(file_content),
-                            display_name=filename,
-                            file_origin=FileOrigin.CHAT_UPLOAD,
-                            file_type=mime_type,
-                        )
-
-                        generated_files.append(
-                            PythonExecutionFile(
-                                filename=filename,
-                                file_link=build_full_frontend_file_url(onyx_file_id),
-                            )
-                        )
-                        generated_file_ids.append(onyx_file_id)
-
-                        # Mark for cleanup
-                        file_ids_to_cleanup.append(workspace_file.file_id)
-
-                    except Exception as e:
-                        logger.error(
-                            f"Failed to handle generated file "
-                            f"{workspace_file.path}: {e}"
-                        )
-
-                # Cleanup Code Interpreter files (generated files)
-                for ci_file_id in file_ids_to_cleanup:
-                    try:
-                        client.delete_file(ci_file_id)
-                    except Exception as e:
-                        logger.error(
-                            f"Failed to delete Code Interpreter generated "
-                            f"file {ci_file_id}: {e}"
-                        )
-
-                # Cleanup staged input files
-                for file_mapping in files_to_stage:
-                    try:
-                        client.delete_file(file_mapping["file_id"])
-                    except Exception as e:
-                        logger.error(
-                            f"Failed to delete Code Interpreter staged "
-                            f"file {file_mapping['file_id']}: {e}"
-                        )
-
-                # Emit file_ids once files are processed
-                if generated_file_ids:
+            for event in client.execute_streaming(
+                code=code,
+                timeout_ms=CODE_INTERPRETER_DEFAULT_TIMEOUT_MS,
+                files=files_to_stage or None,
+            ):
+                if isinstance(event, StreamOutputEvent):
+                    if event.stream == "stdout":
+                        stdout_parts.append(event.data)
+                    else:
+                        stderr_parts.append(event.data)
+                    # Emit incremental delta to frontend
                    self.emitter.emit(
                        Packet(
                            placement=placement,
-                            obj=PythonToolDelta(file_ids=generated_file_ids),
+                            obj=PythonToolDelta(
+                                stdout=event.data if event.stream == "stdout" else "",
+                                stderr=event.data if event.stream == "stderr" else "",
+                            ),
                        )
                    )
+                elif isinstance(event, StreamResultEvent):
+                    result_event = event
+                elif isinstance(event, StreamErrorEvent):
+                    raise RuntimeError(f"Code interpreter error: {event.message}")

-                # Build result
-                result = LlmPythonExecutionResult(
-                    stdout=truncated_stdout,
-                    stderr=truncated_stderr,
-                    exit_code=result_event.exit_code,
-                    timed_out=result_event.timed_out,
-                    generated_files=generated_files,
-                    error=(None if result_event.exit_code == 0 else truncated_stderr),
+            if result_event is None:
+                raise RuntimeError(
+                    "Code interpreter stream ended without a result event"
                )

-                # Serialize result for LLM
-                adapter = TypeAdapter(LlmPythonExecutionResult)
-                llm_response = adapter.dump_json(result).decode()
+            full_stdout = "".join(stdout_parts)
+            full_stderr = "".join(stderr_parts)

-                return ToolResponse(
-                    rich_response=PythonToolRichResponse(
-                        generated_files=generated_files,
-                    ),
-                    llm_facing_response=llm_response,
-                )
+            # Truncate output for LLM consumption
+            truncated_stdout = _truncate_output(
+                full_stdout, CODE_INTERPRETER_MAX_OUTPUT_LENGTH, "stdout"
+            )
+            truncated_stderr = _truncate_output(
+                full_stderr, CODE_INTERPRETER_MAX_OUTPUT_LENGTH, "stderr"
+            )

-            except Exception as e:
-                logger.error(f"Python execution failed: {e}")
-                error_msg = str(e)
+            # Handle generated files
+            generated_files: list[PythonExecutionFile] = []
+            generated_file_ids: list[str] = []
+            file_ids_to_cleanup: list[str] = []
+            file_store = get_default_file_store()

-                # Emit error delta
+            for workspace_file in result_event.files:
+                if workspace_file.kind != "file" or not workspace_file.file_id:
+                    continue
+
+                try:
+                    # Download file from Code Interpreter
+                    file_content = client.download_file(workspace_file.file_id)
+
+                    # Determine MIME type from file extension
+                    filename = workspace_file.path.split("/")[-1]
+                    mime_type, _ = mimetypes.guess_type(filename)
+                    # Default to binary if we can't determine the type
+                    mime_type = mime_type or "application/octet-stream"
+
+                    # Save to Onyx file store
+                    onyx_file_id = file_store.save_file(
+                        content=BytesIO(file_content),
+                        display_name=filename,
+                        file_origin=FileOrigin.CHAT_UPLOAD,
+                        file_type=mime_type,
+                    )
+
+                    generated_files.append(
+                        PythonExecutionFile(
+                            filename=filename,
+                            file_link=build_full_frontend_file_url(onyx_file_id),
+                        )
+                    )
+                    generated_file_ids.append(onyx_file_id)
+
+                    # Mark for cleanup
+                    file_ids_to_cleanup.append(workspace_file.file_id)
+
+                except Exception as e:
+                    logger.error(
+                        f"Failed to handle generated file {workspace_file.path}: {e}"
+                    )
+
+            # Cleanup Code Interpreter files (generated files)
+            for ci_file_id in file_ids_to_cleanup:
+                try:
+                    client.delete_file(ci_file_id)
+                except Exception as e:
+                    logger.error(
+                        f"Failed to delete Code Interpreter generated file {ci_file_id}: {e}"
+                    )
+
+            # Cleanup staged input files
+            for file_mapping in files_to_stage:
+                try:
+                    client.delete_file(file_mapping["file_id"])
+                except Exception as e:
+                    logger.error(
+                        f"Failed to delete Code Interpreter staged file {file_mapping['file_id']}: {e}"
+                    )
+
+            # Emit file_ids once files are processed
+            if generated_file_ids:
                self.emitter.emit(
                    Packet(
                        placement=placement,
-                        obj=PythonToolDelta(
-                            stdout="",
-                            stderr=error_msg,
-                            file_ids=[],
-                        ),
+                        obj=PythonToolDelta(file_ids=generated_file_ids),
                    )
                )

-                # Return error result
-                result = LlmPythonExecutionResult(
-                    stdout="",
-                    stderr=error_msg,
-                    exit_code=-1,
-                    timed_out=False,
-                    generated_files=[],
-                    error=error_msg,
-                )
+            # Build result
+            result = LlmPythonExecutionResult(
+                stdout=truncated_stdout,
+                stderr=truncated_stderr,
+                exit_code=result_event.exit_code,
+                timed_out=result_event.timed_out,
+                generated_files=generated_files,
+                error=None if result_event.exit_code == 0 else truncated_stderr,
+            )

-                adapter = TypeAdapter(LlmPythonExecutionResult)
-                llm_response = adapter.dump_json(result).decode()
+            # Serialize result for LLM
+            adapter = TypeAdapter(LlmPythonExecutionResult)
+            llm_response = adapter.dump_json(result).decode()

-                return ToolResponse(
-                    rich_response=None,
-                    llm_facing_response=llm_response,
+            return ToolResponse(
+                rich_response=None,  # No rich response needed for Python tool
+                llm_facing_response=llm_response,
+            )
+
+        except Exception as e:
+            logger.error(f"Python execution failed: {e}")
+            error_msg = str(e)
+
+            # Emit error delta
+            self.emitter.emit(
+                Packet(
+                    placement=placement,
+                    obj=PythonToolDelta(
+                        stdout="",
+                        stderr=error_msg,
+                        file_ids=[],
+                    ),
                )
+            )
+
+            # Return error result
+            result = LlmPythonExecutionResult(
+                stdout="",
+                stderr=error_msg,
+                exit_code=-1,
+                timed_out=False,
+                generated_files=[],
+                error=error_msg,
+            )
+
+            adapter = TypeAdapter(LlmPythonExecutionResult)
+            llm_response = adapter.dump_json(result).decode()
+
+            return ToolResponse(
+                rich_response=None,
+                llm_facing_response=llm_response,
+            )
--- a/backend/onyx/tools/tool_implementations/web_search/web_search_tool.py
+++ b/backend/onyx/tools/tool_implementations/web_search/web_search_tool.py
@@ -1,5 +1,6 @@
 import json
 from typing import Any
+from typing import cast

 from sqlalchemy.orm import Session
 from typing_extensions import override
@@ -56,30 +57,6 @@ def _sanitize_query(query: str) -> str:
    return " ".join(sanitized.split())


-def _normalize_queries_input(raw: Any) -> list[str]:
-    """Coerce LLM output to a list of sanitized query strings.
-
-    Accepts a bare string or a list (possibly with non-string elements).
-    Sanitizes each query (strip control chars, normalize whitespace) and
-    drops empty or whitespace-only entries.
-    """
-    if isinstance(raw, str):
-        raw = raw.strip()
-        if not raw:
-            return []
-        raw = [raw]
-    elif not isinstance(raw, list):
-        return []
-    result: list[str] = []
-    for q in raw:
-        if q is None:
-            continue
-        sanitized = _sanitize_query(str(q))
-        if sanitized:
-            result.append(sanitized)
-    return result
-
-
 class WebSearchTool(Tool[WebSearchToolOverrideKwargs]):
    NAME = "web_search"
    DESCRIPTION = "Search the web for information."
@@ -212,7 +189,13 @@ class WebSearchTool(Tool[WebSearchToolOverrideKwargs]):
                    f'like: {{"queries": ["your search query here"]}}'
                ),
            )
-        queries = _normalize_queries_input(llm_kwargs[QUERIES_FIELD])
+        raw_queries = cast(list[str], llm_kwargs[QUERIES_FIELD])
+
+        # Normalize queries:
+        # - remove control characters (null bytes, etc.) that LLMs sometimes produce
+        # - collapse whitespace and strip
+        # - drop empty/whitespace-only queries
+        queries = [sanitized for q in raw_queries if (sanitized := _sanitize_query(q))]
        if not queries:
            raise ToolCallException(
                message=(
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -596,7 +596,7 @@ mypy-extensions==1.0.0
    #   typing-inspect
 nest-asyncio==1.6.0
    # via onyx
-nltk==3.9.3
+nltk==3.9.1
    # via unstructured
 numpy==2.4.1
    # via
--- a/backend/scripts/dev_run_background_jobs.py
+++ b/backend/scripts/dev_run_background_jobs.py
@@ -16,6 +16,10 @@ def monitor_process(process_name: str, process: subprocess.Popen) -> None:


 def run_jobs() -> None:
+    # Check if we should use lightweight mode, defaults to True, change to False to use separate background workers
+    use_lightweight = True
+
+    # command setup
    cmd_worker_primary = [
        "celery",
        "-A",
@@ -70,48 +74,6 @@ def run_jobs() -> None:
        "--queues=connector_doc_fetching",
    ]

-    cmd_worker_heavy = [
-        "celery",
-        "-A",
-        "onyx.background.celery.versioned_apps.heavy",
-        "worker",
-        "--pool=threads",
-        "--concurrency=4",
-        "--prefetch-multiplier=1",
-        "--loglevel=INFO",
-        "--hostname=heavy@%n",
-        "-Q",
-        "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,sandbox",
-    ]
-
-    cmd_worker_monitoring = [
-        "celery",
-        "-A",
-        "onyx.background.celery.versioned_apps.monitoring",
-        "worker",
-        "--pool=threads",
-        "--concurrency=1",
-        "--prefetch-multiplier=1",
-        "--loglevel=INFO",
-        "--hostname=monitoring@%n",
-        "-Q",
-        "monitoring",
-    ]
-
-    cmd_worker_user_file_processing = [
-        "celery",
-        "-A",
-        "onyx.background.celery.versioned_apps.user_file_processing",
-        "worker",
-        "--pool=threads",
-        "--concurrency=2",
-        "--prefetch-multiplier=1",
-        "--loglevel=INFO",
-        "--hostname=user_file_processing@%n",
-        "-Q",
-        "user_file_processing,user_file_project_sync,user_file_delete",
-    ]
-
    cmd_beat = [
        "celery",
        "-A",
@@ -120,31 +82,144 @@ def run_jobs() -> None:
        "--loglevel=INFO",
    ]

-    all_workers = [
-        ("PRIMARY", cmd_worker_primary),
-        ("LIGHT", cmd_worker_light),
-        ("DOCPROCESSING", cmd_worker_docprocessing),
-        ("DOCFETCHING", cmd_worker_docfetching),
-        ("HEAVY", cmd_worker_heavy),
-        ("MONITORING", cmd_worker_monitoring),
-        ("USER_FILE_PROCESSING", cmd_worker_user_file_processing),
-        ("BEAT", cmd_beat),
-    ]
+    # Prepare background worker commands based on mode
+    if use_lightweight:
+        print("Starting workers in LIGHTWEIGHT mode (single background worker)")
+        cmd_worker_background = [
+            "celery",
+            "-A",
+            "onyx.background.celery.versioned_apps.background",
+            "worker",
+            "--pool=threads",
+            "--concurrency=6",
+            "--prefetch-multiplier=1",
+            "--loglevel=INFO",
+            "--hostname=background@%n",
+            "-Q",
+            "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,monitoring,user_file_processing,user_file_project_sync,user_file_delete,opensearch_migration",
+        ]
+        background_workers = [("BACKGROUND", cmd_worker_background)]
+    else:
+        print("Starting workers in STANDARD mode (separate background workers)")
+        cmd_worker_heavy = [
+            "celery",
+            "-A",
+            "onyx.background.celery.versioned_apps.heavy",
+            "worker",
+            "--pool=threads",
+            "--concurrency=4",
+            "--prefetch-multiplier=1",
+            "--loglevel=INFO",
+            "--hostname=heavy@%n",
+            "-Q",
+            "connector_pruning,sandbox",
+        ]
+        cmd_worker_monitoring = [
+            "celery",
+            "-A",
+            "onyx.background.celery.versioned_apps.monitoring",
+            "worker",
+            "--pool=threads",
+            "--concurrency=1",
+            "--prefetch-multiplier=1",
+            "--loglevel=INFO",
+            "--hostname=monitoring@%n",
+            "-Q",
+            "monitoring",
+        ]
+        cmd_worker_user_file_processing = [
+            "celery",
+            "-A",
+            "onyx.background.celery.versioned_apps.user_file_processing",
+            "worker",
+            "--pool=threads",
+            "--concurrency=2",
+            "--prefetch-multiplier=1",
+            "--loglevel=INFO",
+            "--hostname=user_file_processing@%n",
+            "-Q",
+            "user_file_processing,user_file_project_sync,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,user_file_delete",
+        ]
+        background_workers = [
+            ("HEAVY", cmd_worker_heavy),
+            ("MONITORING", cmd_worker_monitoring),
+            ("USER_FILE_PROCESSING", cmd_worker_user_file_processing),
+        ]

-    processes = []
-    for name, cmd in all_workers:
+    # spawn processes
+    worker_primary_process = subprocess.Popen(
+        cmd_worker_primary, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
+    )
+
+    worker_light_process = subprocess.Popen(
+        cmd_worker_light, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
+    )
+
+    worker_docprocessing_process = subprocess.Popen(
+        cmd_worker_docprocessing,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+    )
+
+    worker_docfetching_process = subprocess.Popen(
+        cmd_worker_docfetching,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+    )
+
+    beat_process = subprocess.Popen(
+        cmd_beat, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
+    )
+
+    # Spawn background worker processes based on mode
+    background_processes = []
+    for name, cmd in background_workers:
        process = subprocess.Popen(
            cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
        )
-        processes.append((name, process))
+        background_processes.append((name, process))

-    threads = []
-    for name, process in processes:
+    # monitor threads
+    worker_primary_thread = threading.Thread(
+        target=monitor_process, args=("PRIMARY", worker_primary_process)
+    )
+    worker_light_thread = threading.Thread(
+        target=monitor_process, args=("LIGHT", worker_light_process)
+    )
+    worker_docprocessing_thread = threading.Thread(
+        target=monitor_process, args=("DOCPROCESSING", worker_docprocessing_process)
+    )
+    worker_docfetching_thread = threading.Thread(
+        target=monitor_process, args=("DOCFETCHING", worker_docfetching_process)
+    )
+    beat_thread = threading.Thread(target=monitor_process, args=("BEAT", beat_process))
+
+    # Create monitor threads for background workers
+    background_threads = []
+    for name, process in background_processes:
        thread = threading.Thread(target=monitor_process, args=(name, process))
-        threads.append(thread)
+        background_threads.append(thread)
+
+    # Start all threads
+    worker_primary_thread.start()
+    worker_light_thread.start()
+    worker_docprocessing_thread.start()
+    worker_docfetching_thread.start()
+    beat_thread.start()
+
+    for thread in background_threads:
        thread.start()

-    for thread in threads:
+    # Wait for all threads
+    worker_primary_thread.join()
+    worker_light_thread.join()
+    worker_docprocessing_thread.join()
+    worker_docfetching_thread.join()
+    beat_thread.join()
+
+    for thread in background_threads:
        thread.join()


--- a/backend/scripts/restart_containers.sh
+++ b/backend/scripts/restart_containers.sh
@@ -1,20 +1,10 @@
 #!/bin/bash
 set -e

-SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
-COMPOSE_FILE="$SCRIPT_DIR/../../deployment/docker_compose/docker-compose.yml"
-COMPOSE_DEV_FILE="$SCRIPT_DIR/../../deployment/docker_compose/docker-compose.dev.yml"
-
-stop_and_remove_containers() {
-  docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
-  docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
-  docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-enabled stop opensearch 2>/dev/null || true
-  docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-enabled rm -f opensearch 2>/dev/null || true
-}
-
 cleanup() {
  echo "Error occurred. Cleaning up..."
-  stop_and_remove_containers
+  docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
+  docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
 }

 # Trap errors and output a message, then cleanup
@@ -22,26 +12,16 @@ trap 'echo "Error occurred on line $LINENO. Exiting script." >&2; cleanup' ERR

 # Usage of the script with optional volume arguments
 # ./restart_containers.sh [vespa_volume] [postgres_volume] [redis_volume]
-# [minio_volume] [--keep-opensearch-data]

-KEEP_OPENSEARCH_DATA=false
-POSITIONAL_ARGS=()
-for arg in "$@"; do
-    if [[ "$arg" == "--keep-opensearch-data" ]]; then
-        KEEP_OPENSEARCH_DATA=true
-    else
-        POSITIONAL_ARGS+=("$arg")
-    fi
-done
-
-VESPA_VOLUME=${POSITIONAL_ARGS[0]:-""}
-POSTGRES_VOLUME=${POSITIONAL_ARGS[1]:-""}
-REDIS_VOLUME=${POSITIONAL_ARGS[2]:-""}
-MINIO_VOLUME=${POSITIONAL_ARGS[3]:-""}
+VESPA_VOLUME=${1:-""}  # Default is empty if not provided
+POSTGRES_VOLUME=${2:-""}  # Default is empty if not provided
+REDIS_VOLUME=${3:-""}  # Default is empty if not provided
+MINIO_VOLUME=${4:-""}  # Default is empty if not provided

 # Stop and remove the existing containers
 echo "Stopping and removing existing containers..."
-stop_and_remove_containers
+docker stop onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true
+docker rm onyx_postgres onyx_vespa onyx_redis onyx_minio onyx_code_interpreter 2>/dev/null || true

 # Start the PostgreSQL container with optional volume
 echo "Starting PostgreSQL container..."
@@ -59,29 +39,6 @@ else
    docker run --detach --name onyx_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 vespaengine/vespa:8
 fi

-# If OPENSEARCH_ADMIN_PASSWORD is not already set, try loading it from
-# .vscode/.env so existing dev setups that stored it there aren't silently
-# broken.
-VSCODE_ENV="$SCRIPT_DIR/../../.vscode/.env"
-if [[ -z "${OPENSEARCH_ADMIN_PASSWORD:-}" && -f "$VSCODE_ENV" ]]; then
-    set -a
-    # shellcheck source=/dev/null
-    source "$VSCODE_ENV"
-    set +a
-fi
-
-# Start the OpenSearch container using the same service from docker-compose that
-# our users use, setting OPENSEARCH_INITIAL_ADMIN_PASSWORD from the env's
-# OPENSEARCH_ADMIN_PASSWORD if it exists, else defaulting to StrongPassword123!.
-# Pass --keep-opensearch-data to preserve the opensearch-data volume across
-# restarts, else the volume is deleted so the container starts fresh.
-if [[ "$KEEP_OPENSEARCH_DATA" == "false" ]]; then
-    echo "Deleting opensearch-data volume..."
-    docker volume rm onyx_opensearch-data 2>/dev/null || true
-fi
-echo "Starting OpenSearch container..."
-docker compose -f "$COMPOSE_FILE" -f "$COMPOSE_DEV_FILE" --profile opensearch-enabled up --force-recreate -d opensearch
-
 # Start the Redis container with optional volume
 echo "Starting Redis container..."
 if [[ -n "$REDIS_VOLUME" ]]; then
@@ -103,6 +60,7 @@ echo "Starting Code Interpreter container..."
 docker run --detach --name onyx_code_interpreter --publish 8000:8000 --user root -v /var/run/docker.sock:/var/run/docker.sock onyxdotapp/code-interpreter:latest bash ./entrypoint.sh code-interpreter-api

 # Ensure alembic runs in the correct directory (backend/)
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
 PARENT_DIR="$(dirname "$SCRIPT_DIR")"
 cd "$PARENT_DIR"

--- a/backend/scripts/restart_opensearch_container.sh
+++ b/backend/scripts/restart_opensearch_container.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# We get OPENSEARCH_ADMIN_PASSWORD from the repo .env file.
+source "$(dirname "$0")/../../.vscode/.env"
+
+cd "$(dirname "$0")/../../deployment/docker_compose"
+
+# Start OpenSearch.
+echo "Forcefully starting fresh OpenSearch container..."
+docker compose -f docker-compose.opensearch.yml up --force-recreate -d opensearch
--- a/backend/scripts/supervisord_entrypoint.sh
+++ b/backend/scripts/supervisord_entrypoint.sh
@@ -1,5 +1,23 @@
 #!/bin/sh
-# Entrypoint script for supervisord
+# Entrypoint script for supervisord that sets environment variables
+# for controlling which celery workers to start
+
+# Default to lightweight mode if not set
+if [ -z "$USE_LIGHTWEIGHT_BACKGROUND_WORKER" ]; then
+    export USE_LIGHTWEIGHT_BACKGROUND_WORKER="true"
+fi
+
+# Set the complementary variable for supervisord
+# because it doesn't support %(not ENV_USE_LIGHTWEIGHT_BACKGROUND_WORKER) syntax
+if [ "$USE_LIGHTWEIGHT_BACKGROUND_WORKER" = "true" ]; then
+    export USE_SEPARATE_BACKGROUND_WORKERS="false"
+else
+    export USE_SEPARATE_BACKGROUND_WORKERS="true"
+fi
+
+echo "Worker mode configuration:"
+echo "  USE_LIGHTWEIGHT_BACKGROUND_WORKER=$USE_LIGHTWEIGHT_BACKGROUND_WORKER"
+echo "  USE_SEPARATE_BACKGROUND_WORKERS=$USE_SEPARATE_BACKGROUND_WORKERS"

 # Launch supervisord with environment variables available
 exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
--- a/backend/supervisord.conf
+++ b/backend/supervisord.conf
@@ -39,6 +39,7 @@ autorestart=true
 startsecs=10
 stopasgroup=true

+# Standard mode: Light worker for fast operations
 # NOTE: only allowing configuration here and not in the other celery workers,
 # since this is often the bottleneck for "sync" jobs (e.g. document set syncing,
 # user group syncing, deletion, etc.)
@@ -53,7 +54,26 @@ redirect_stderr=true
 autorestart=true
 startsecs=10
 stopasgroup=true
+autostart=%(ENV_USE_SEPARATE_BACKGROUND_WORKERS)s

+# Lightweight mode: single consolidated background worker
+# Used when USE_LIGHTWEIGHT_BACKGROUND_WORKER=true (default)
+# Consolidates: light, docprocessing, docfetching, heavy, monitoring, user_file_processing
+[program:celery_worker_background]
+command=celery -A onyx.background.celery.versioned_apps.background worker
+    --loglevel=INFO
+    --hostname=background@%%n
+    -Q vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup,index_attempt_cleanup,sandbox,docprocessing,connector_doc_fetching,connector_pruning,connector_doc_permissions_sync,connector_external_group_sync,csv_generation,monitoring,user_file_processing,user_file_project_sync,opensearch_migration
+stdout_logfile=/var/log/celery_worker_background.log
+stdout_logfile_maxbytes=16MB
+redirect_stderr=true
+autorestart=true
+startsecs=10
+stopasgroup=true
+autostart=%(ENV_USE_LIGHTWEIGHT_BACKGROUND_WORKER)s
+
+# Standard mode: separate workers for different background tasks
+# Used when USE_LIGHTWEIGHT_BACKGROUND_WORKER=false
 [program:celery_worker_heavy]
 command=celery -A onyx.background.celery.versioned_apps.heavy worker
    --loglevel=INFO
@@ -65,7 +85,9 @@ redirect_stderr=true
 autorestart=true
 startsecs=10
 stopasgroup=true
+autostart=%(ENV_USE_SEPARATE_BACKGROUND_WORKERS)s

+# Standard mode: Document processing worker
 [program:celery_worker_docprocessing]
 command=celery -A onyx.background.celery.versioned_apps.docprocessing worker
    --loglevel=INFO
@@ -77,6 +99,7 @@ redirect_stderr=true
 autorestart=true
 startsecs=10
 stopasgroup=true
+autostart=%(ENV_USE_SEPARATE_BACKGROUND_WORKERS)s

 [program:celery_worker_user_file_processing]
 command=celery -A onyx.background.celery.versioned_apps.user_file_processing worker
@@ -89,7 +112,9 @@ redirect_stderr=true
 autorestart=true
 startsecs=10
 stopasgroup=true
+autostart=%(ENV_USE_SEPARATE_BACKGROUND_WORKERS)s

+# Standard mode: Document fetching worker
 [program:celery_worker_docfetching]
 command=celery -A onyx.background.celery.versioned_apps.docfetching worker
    --loglevel=INFO
@@ -101,6 +126,7 @@ redirect_stderr=true
 autorestart=true
 startsecs=10
 stopasgroup=true
+autostart=%(ENV_USE_SEPARATE_BACKGROUND_WORKERS)s

 [program:celery_worker_monitoring]
 command=celery -A onyx.background.celery.versioned_apps.monitoring worker
@@ -113,6 +139,7 @@ redirect_stderr=true
 autorestart=true
 startsecs=10
 stopasgroup=true
+autostart=%(ENV_USE_SEPARATE_BACKGROUND_WORKERS)s


 # Job scheduler for periodic tasks
@@ -170,6 +197,7 @@ command=tail -qF
    /var/log/celery_beat.log
    /var/log/celery_worker_primary.log
    /var/log/celery_worker_light.log
+    /var/log/celery_worker_background.log
    /var/log/celery_worker_heavy.log
    /var/log/celery_worker_docprocessing.log
    /var/log/celery_worker_monitoring.log
--- a/backend/tests/external_dependency_unit/celery/test_pruning_hierarchy_nodes.py
+++ b/backend/tests/external_dependency_unit/celery/test_pruning_hierarchy_nodes.py
@@ -5,8 +5,6 @@ Verifies that:
 1. extract_ids_from_runnable_connector correctly separates hierarchy nodes from doc IDs
 2. Extracted hierarchy nodes are correctly upserted to Postgres via upsert_hierarchy_nodes_batch
 3. Upserting is idempotent (running twice doesn't duplicate nodes)
-4. Document-to-hierarchy-node linkage is updated during pruning
-5. link_hierarchy_nodes_to_documents links nodes that are also documents

 Uses a mock SlimConnectorWithPermSync that yields known hierarchy nodes and slim documents,
 combined with a real PostgreSQL database for verifying persistence.
@@ -29,13 +27,9 @@ from onyx.db.enums import HierarchyNodeType
 from onyx.db.hierarchy import ensure_source_node_exists
 from onyx.db.hierarchy import get_all_hierarchy_nodes_for_source
 from onyx.db.hierarchy import get_hierarchy_node_by_raw_id
-from onyx.db.hierarchy import link_hierarchy_nodes_to_documents
-from onyx.db.hierarchy import update_document_parent_hierarchy_nodes
 from onyx.db.hierarchy import upsert_hierarchy_nodes_batch
-from onyx.db.models import Document as DbDocument
 from onyx.db.models import HierarchyNode as DBHierarchyNode
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
-from onyx.kg.models import KGStage

 # ---------------------------------------------------------------------------
 # Constants
@@ -95,18 +89,8 @@ def _make_hierarchy_nodes() -> list[PydanticHierarchyNode]:
    ]


-DOC_PARENT_MAP = {
-    "msg-001": CHANNEL_A_ID,
-    "msg-002": CHANNEL_A_ID,
-    "msg-003": CHANNEL_B_ID,
-}
-
-
 def _make_slim_docs() -> list[SlimDocument | PydanticHierarchyNode]:
-    return [
-        SlimDocument(id=doc_id, parent_hierarchy_raw_node_id=DOC_PARENT_MAP.get(doc_id))
-        for doc_id in SLIM_DOC_IDS
-    ]
+    return [SlimDocument(id=doc_id) for doc_id in SLIM_DOC_IDS]


 class MockSlimConnectorWithPermSync(SlimConnectorWithPermSync):
@@ -142,31 +126,14 @@ class MockSlimConnectorWithPermSync(SlimConnectorWithPermSync):
 # ---------------------------------------------------------------------------


-def _cleanup_test_data(db_session: Session) -> None:
-    """Remove all test hierarchy nodes and documents to isolate tests."""
-    for doc_id in SLIM_DOC_IDS:
-        db_session.query(DbDocument).filter(DbDocument.id == doc_id).delete()
+def _cleanup_test_hierarchy_nodes(db_session: Session) -> None:
+    """Remove all hierarchy nodes for TEST_SOURCE to isolate tests."""
    db_session.query(DBHierarchyNode).filter(
        DBHierarchyNode.source == TEST_SOURCE
    ).delete()
    db_session.commit()


-def _create_test_documents(db_session: Session) -> list[DbDocument]:
-    """Insert minimal Document rows for our test doc IDs."""
-    docs = []
-    for doc_id in SLIM_DOC_IDS:
-        doc = DbDocument(
-            id=doc_id,
-            semantic_id=doc_id,
-            kg_stage=KGStage.NOT_STARTED,
-        )
-        db_session.add(doc)
-        docs.append(doc)
-    db_session.commit()
-    return docs
-
-
 # ---------------------------------------------------------------------------
 # Tests
 # ---------------------------------------------------------------------------
@@ -180,14 +147,14 @@ def test_pruning_extracts_hierarchy_nodes(db_session: Session) -> None:  # noqa:
    result = extract_ids_from_runnable_connector(connector, callback=None)

    # Doc IDs should include both slim doc IDs and hierarchy node raw_node_ids
-    # (hierarchy node IDs are added to raw_id_to_parent so they aren't pruned)
+    # (hierarchy node IDs are added to doc_ids so they aren't pruned)
    expected_ids = {
        CHANNEL_A_ID,
        CHANNEL_B_ID,
        CHANNEL_C_ID,
        *SLIM_DOC_IDS,
    }
-    assert result.raw_id_to_parent.keys() == expected_ids
+    assert result.doc_ids == expected_ids

    # Hierarchy nodes should be the 3 channels
    assert len(result.hierarchy_nodes) == 3
@@ -198,7 +165,7 @@ def test_pruning_extracts_hierarchy_nodes(db_session: Session) -> None:  # noqa:
 def test_pruning_upserts_hierarchy_nodes_to_db(db_session: Session) -> None:
    """Full flow: extract hierarchy nodes from mock connector, upsert to Postgres,
    then verify the DB state (node count, parent relationships, permissions)."""
-    _cleanup_test_data(db_session)
+    _cleanup_test_hierarchy_nodes(db_session)

    # Step 1: ensure the SOURCE node exists (mirrors what the pruning task does)
    source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
@@ -263,7 +230,7 @@ def test_pruning_upserts_hierarchy_nodes_public_connector(
 ) -> None:
    """When the connector's access type is PUBLIC, all hierarchy nodes must be
    marked is_public=True regardless of their external_access settings."""
-    _cleanup_test_data(db_session)
+    _cleanup_test_hierarchy_nodes(db_session)

    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)

@@ -290,7 +257,7 @@ def test_pruning_upserts_hierarchy_nodes_public_connector(
 def test_pruning_hierarchy_node_upsert_idempotency(db_session: Session) -> None:
    """Upserting the same hierarchy nodes twice must not create duplicates.
    The second call should update existing rows in place."""
-    _cleanup_test_data(db_session)
+    _cleanup_test_hierarchy_nodes(db_session)

    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)

@@ -328,7 +295,7 @@ def test_pruning_hierarchy_node_upsert_idempotency(db_session: Session) -> None:

 def test_pruning_hierarchy_node_upsert_updates_fields(db_session: Session) -> None:
    """Upserting a hierarchy node with changed fields should update the existing row."""
-    _cleanup_test_data(db_session)
+    _cleanup_test_hierarchy_nodes(db_session)

    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)

@@ -375,193 +342,3 @@ def test_pruning_hierarchy_node_upsert_updates_fields(db_session: Session) -> No
    assert db_node.is_public is True
    assert db_node.external_user_emails is not None
    assert set(db_node.external_user_emails) == {"new_user@example.com"}
-
-
-# ---------------------------------------------------------------------------
-# Document-to-hierarchy-node linkage tests
-# ---------------------------------------------------------------------------
-
-
-def test_extraction_preserves_parent_hierarchy_raw_node_id(
-    db_session: Session,  # noqa: ARG001
-) -> None:
-    """extract_ids_from_runnable_connector should carry the
-    parent_hierarchy_raw_node_id from SlimDocument into the raw_id_to_parent dict."""
-    connector = MockSlimConnectorWithPermSync()
-    result = extract_ids_from_runnable_connector(connector, callback=None)
-
-    for doc_id, expected_parent in DOC_PARENT_MAP.items():
-        assert (
-            result.raw_id_to_parent[doc_id] == expected_parent
-        ), f"raw_id_to_parent[{doc_id}] should be {expected_parent}"
-
-    # Hierarchy node entries have None parent (they aren't documents)
-    for channel_id in [CHANNEL_A_ID, CHANNEL_B_ID, CHANNEL_C_ID]:
-        assert result.raw_id_to_parent[channel_id] is None
-
-
-def test_update_document_parent_hierarchy_nodes(db_session: Session) -> None:
-    """update_document_parent_hierarchy_nodes should set
-    Document.parent_hierarchy_node_id for each document in the mapping."""
-    _cleanup_test_data(db_session)
-
-    source_node = ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
-    upserted = upsert_hierarchy_nodes_batch(
-        db_session=db_session,
-        nodes=_make_hierarchy_nodes(),
-        source=TEST_SOURCE,
-        commit=True,
-        is_connector_public=False,
-    )
-    node_id_by_raw = {n.raw_node_id: n.id for n in upserted}
-
-    # Create documents with no parent set
-    docs = _create_test_documents(db_session)
-    for doc in docs:
-        assert doc.parent_hierarchy_node_id is None
-
-    # Build resolved map (same logic as _resolve_and_update_document_parents)
-    resolved: dict[str, int | None] = {}
-    for doc_id, raw_parent in DOC_PARENT_MAP.items():
-        resolved[doc_id] = node_id_by_raw.get(raw_parent, source_node.id)
-
-    updated = update_document_parent_hierarchy_nodes(
-        db_session=db_session,
-        doc_parent_map=resolved,
-        commit=True,
-    )
-    assert updated == len(SLIM_DOC_IDS)
-
-    # Verify each document now points to the correct hierarchy node
-    db_session.expire_all()
-    for doc_id, raw_parent in DOC_PARENT_MAP.items():
-        tmp_doc = db_session.get(DbDocument, doc_id)
-        assert tmp_doc is not None
-        doc = tmp_doc
-        expected_node_id = node_id_by_raw[raw_parent]
-        assert (
-            doc.parent_hierarchy_node_id == expected_node_id
-        ), f"Document {doc_id} should point to node for {raw_parent}"
-
-
-def test_update_document_parent_is_idempotent(db_session: Session) -> None:
-    """Running update_document_parent_hierarchy_nodes a second time with the
-    same mapping should update zero rows."""
-    _cleanup_test_data(db_session)
-
-    ensure_source_node_exists(db_session, TEST_SOURCE, commit=True)
-    upserted = upsert_hierarchy_nodes_batch(
-        db_session=db_session,
-        nodes=_make_hierarchy_nodes(),
-        source=TEST_SOURCE,
-        commit=True,
-        is_connector_public=False,
-    )
-    node_id_by_raw = {n.raw_node_id: n.id for n in upserted}
-    _create_test_documents(db_session)
-
-    resolved: dict[str, int | None] = {
-        doc_id: node_id_by_raw[raw_parent]
-        for doc_id, raw_parent in DOC_PARENT_MAP.items()
-    }
-
-    first_updated = update_document_parent_hierarchy_nodes(
-        db_session=db_session,
-        doc_parent_map=resolved,
-        commit=True,
-    )
-    assert first_updated == len(SLIM_DOC_IDS)
-
-    second_updated = update_document_parent_hierarchy_nodes(
-        db_session=db_session,
-        doc_parent_map=resolved,
-        commit=True,
-    )
-    assert second_updated == 0
-
-
-def test_link_hierarchy_nodes_to_documents_for_confluence(
-    db_session: Session,
-) -> None:
-    """For sources in SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS (e.g. Confluence),
-    link_hierarchy_nodes_to_documents should set HierarchyNode.document_id
-    when a hierarchy node's raw_node_id matches a document ID."""
-    _cleanup_test_data(db_session)
-    confluence_source = DocumentSource.CONFLUENCE
-
-    # Clean up any existing Confluence hierarchy nodes
-    db_session.query(DBHierarchyNode).filter(
-        DBHierarchyNode.source == confluence_source
-    ).delete()
-    db_session.commit()
-
-    ensure_source_node_exists(db_session, confluence_source, commit=True)
-
-    # Create a hierarchy node whose raw_node_id matches a document ID
-    page_node_id = "confluence-page-123"
-    nodes = [
-        PydanticHierarchyNode(
-            raw_node_id=page_node_id,
-            raw_parent_id=None,
-            display_name="Test Page",
-            link="https://wiki.example.com/page/123",
-            node_type=HierarchyNodeType.PAGE,
-        ),
-    ]
-    upsert_hierarchy_nodes_batch(
-        db_session=db_session,
-        nodes=nodes,
-        source=confluence_source,
-        commit=True,
-        is_connector_public=False,
-    )
-
-    # Verify the node exists but has no document_id yet
-    db_node = get_hierarchy_node_by_raw_id(db_session, page_node_id, confluence_source)
-    assert db_node is not None
-    assert db_node.document_id is None
-
-    # Create a document with the same ID as the hierarchy node
-    doc = DbDocument(
-        id=page_node_id,
-        semantic_id="Test Page",
-        kg_stage=KGStage.NOT_STARTED,
-    )
-    db_session.add(doc)
-    db_session.commit()
-
-    # Link nodes to documents
-    linked = link_hierarchy_nodes_to_documents(
-        db_session=db_session,
-        document_ids=[page_node_id],
-        source=confluence_source,
-        commit=True,
-    )
-    assert linked == 1
-
-    # Verify the hierarchy node now has document_id set
-    db_session.expire_all()
-    db_node = get_hierarchy_node_by_raw_id(db_session, page_node_id, confluence_source)
-    assert db_node is not None
-    assert db_node.document_id == page_node_id
-
-    # Cleanup
-    db_session.query(DbDocument).filter(DbDocument.id == page_node_id).delete()
-    db_session.query(DBHierarchyNode).filter(
-        DBHierarchyNode.source == confluence_source
-    ).delete()
-    db_session.commit()
-
-
-def test_link_hierarchy_nodes_skips_non_hierarchy_sources(
-    db_session: Session,
-) -> None:
-    """link_hierarchy_nodes_to_documents should return 0 for sources that
-    don't support hierarchy-node-as-document (e.g. Slack, Google Drive)."""
-    linked = link_hierarchy_nodes_to_documents(
-        db_session=db_session,
-        document_ids=SLIM_DOC_IDS,
-        source=TEST_SOURCE,  # Slack — not in SOURCES_WITH_HIERARCHY_NODE_DOCUMENTS
-        commit=False,
-    )
-    assert linked == 0
--- a/backend/tests/external_dependency_unit/llm/test_llm_provider.py
+++ b/backend/tests/external_dependency_unit/llm/test_llm_provider.py
@@ -11,6 +11,7 @@ from unittest.mock import patch
 from uuid import uuid4

 import pytest
+from fastapi import HTTPException
 from sqlalchemy.orm import Session

 from onyx.db.enums import LLMModelFlowType
@@ -19,8 +20,6 @@ from onyx.db.llm import remove_llm_provider
 from onyx.db.llm import update_default_provider
 from onyx.db.llm import upsert_llm_provider
 from onyx.db.models import UserRole
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
 from onyx.llm.constants import LlmProviderNames
 from onyx.llm.interfaces import LLM
 from onyx.server.manage.llm.api import (
@@ -123,16 +122,16 @@ class TestLLMConfigurationEndpoint:
        finally:
            db_session.rollback()

-    def test_failed_llm_test_raises_onyx_error(
+    def test_failed_llm_test_raises_http_exception(
        self,
        db_session: Session,
        provider_name: str,  # noqa: ARG002
    ) -> None:
        """
-        Test that a failed LLM test raises an OnyxError with VALIDATION_ERROR.
+        Test that a failed LLM test raises an HTTPException with status 400.

        When test_llm returns an error message, the endpoint should raise
-        an OnyxError with the error details.
+        an HTTPException with the error details.
        """
        error_message = "Invalid API key: Authentication failed"

@@ -144,7 +143,7 @@ class TestLLMConfigurationEndpoint:
            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_failure
            ):
-                with pytest.raises(OnyxError) as exc_info:
+                with pytest.raises(HTTPException) as exc_info:
                    run_test_llm_configuration(
                        test_llm_request=LLMTestRequest(
                            provider=LlmProviderNames.OPENAI,
@@ -157,8 +156,9 @@ class TestLLMConfigurationEndpoint:
                        db_session=db_session,
                    )

-                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
-                assert exc_info.value.message == error_message
+                # Verify the exception details
+                assert exc_info.value.status_code == 400
+                assert exc_info.value.detail == error_message

        finally:
            db_session.rollback()
@@ -536,11 +536,11 @@ class TestDefaultProviderEndpoint:
                remove_llm_provider(db_session, provider.id)

            # Now run_test_default_provider should fail
-            with pytest.raises(OnyxError) as exc_info:
+            with pytest.raises(HTTPException) as exc_info:
                run_test_default_provider(_=_create_mock_admin())

-            assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
-            assert "No LLM Provider setup" in exc_info.value.message
+            assert exc_info.value.status_code == 400
+            assert "No LLM Provider setup" in exc_info.value.detail

        finally:
            db_session.rollback()
@@ -581,11 +581,11 @@ class TestDefaultProviderEndpoint:
            with patch(
                "onyx.server.manage.llm.api.test_llm", side_effect=mock_test_llm_failure
            ):
-                with pytest.raises(OnyxError) as exc_info:
+                with pytest.raises(HTTPException) as exc_info:
                    run_test_default_provider(_=_create_mock_admin())

-                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
-                assert exc_info.value.message == error_message
+                assert exc_info.value.status_code == 400
+                assert exc_info.value.detail == error_message

        finally:
            db_session.rollback()
--- a/backend/tests/external_dependency_unit/llm/test_llm_provider_api_base.py
+++ b/backend/tests/external_dependency_unit/llm/test_llm_provider_api_base.py
@@ -16,14 +16,13 @@ from unittest.mock import patch
 from uuid import uuid4

 import pytest
+from fastapi import HTTPException
 from sqlalchemy.orm import Session

 from onyx.db.llm import fetch_existing_llm_provider
 from onyx.db.llm import remove_llm_provider
 from onyx.db.llm import upsert_llm_provider
 from onyx.db.models import UserRole
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
 from onyx.llm.constants import LlmProviderNames
 from onyx.server.manage.llm.api import _mask_string
 from onyx.server.manage.llm.api import put_llm_provider
@@ -101,7 +100,7 @@ class TestLLMProviderChanges:
                    api_base="https://attacker.example.com",
                )

-                with pytest.raises(OnyxError) as exc_info:
+                with pytest.raises(HTTPException) as exc_info:
                    put_llm_provider(
                        llm_provider_upsert_request=update_request,
                        is_creation=False,
@@ -109,9 +108,9 @@ class TestLLMProviderChanges:
                        db_session=db_session,
                    )

-                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
+                assert exc_info.value.status_code == 400
                assert "cannot be changed without changing the API key" in str(
-                    exc_info.value.message
+                    exc_info.value.detail
                )
        finally:
            _cleanup_provider(db_session, provider_name)
@@ -237,7 +236,7 @@ class TestLLMProviderChanges:
                    api_base=None,
                )

-                with pytest.raises(OnyxError) as exc_info:
+                with pytest.raises(HTTPException) as exc_info:
                    put_llm_provider(
                        llm_provider_upsert_request=update_request,
                        is_creation=False,
@@ -245,9 +244,9 @@ class TestLLMProviderChanges:
                        db_session=db_session,
                    )

-                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
+                assert exc_info.value.status_code == 400
                assert "cannot be changed without changing the API key" in str(
-                    exc_info.value.message
+                    exc_info.value.detail
                )
        finally:
            _cleanup_provider(db_session, provider_name)
@@ -340,7 +339,7 @@ class TestLLMProviderChanges:
                    custom_config_changed=True,
                )

-                with pytest.raises(OnyxError) as exc_info:
+                with pytest.raises(HTTPException) as exc_info:
                    put_llm_provider(
                        llm_provider_upsert_request=update_request,
                        is_creation=False,
@@ -348,9 +347,9 @@ class TestLLMProviderChanges:
                        db_session=db_session,
                    )

-                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
+                assert exc_info.value.status_code == 400
                assert "cannot be changed without changing the API key" in str(
-                    exc_info.value.message
+                    exc_info.value.detail
                )
        finally:
            _cleanup_provider(db_session, provider_name)
@@ -376,7 +375,7 @@ class TestLLMProviderChanges:
                    custom_config_changed=True,
                )

-                with pytest.raises(OnyxError) as exc_info:
+                with pytest.raises(HTTPException) as exc_info:
                    put_llm_provider(
                        llm_provider_upsert_request=update_request,
                        is_creation=False,
@@ -384,9 +383,9 @@ class TestLLMProviderChanges:
                        db_session=db_session,
                    )

-                assert exc_info.value.error_code == OnyxErrorCode.VALIDATION_ERROR
+                assert exc_info.value.status_code == 400
                assert "cannot be changed without changing the API key" in str(
-                    exc_info.value.message
+                    exc_info.value.detail
                )
        finally:
            _cleanup_provider(db_session, provider_name)
--- a/backend/tests/external_dependency_unit/search_settings/test_search_settings.py
+++ b/backend/tests/external_dependency_unit/search_settings/test_search_settings.py
@@ -11,7 +11,6 @@ from onyx.context.search.models import SavedSearchSettings
 from onyx.context.search.models import SearchSettingsCreationRequest
 from onyx.db.enums import EmbeddingPrecision
 from onyx.db.llm import fetch_default_contextual_rag_model
-from onyx.db.llm import fetch_existing_llm_provider
 from onyx.db.llm import update_default_contextual_model
 from onyx.db.llm import upsert_llm_provider
 from onyx.db.models import IndexModelStatus
@@ -38,8 +37,6 @@ def _create_llm_provider_and_model(
    model_name: str,
 ) -> None:
    """Insert an LLM provider with a single visible model configuration."""
-    if fetch_existing_llm_provider(name=provider_name, db_session=db_session):
-        return
    upsert_llm_provider(
        LLMProviderUpsertRequest(
            name=provider_name,
@@ -149,8 +146,8 @@ def baseline_search_settings(
    )


+@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
@patch("onyx.db.swap_index.get_all_document_indices")
-@patch("onyx.server.manage.search_settings.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_default_document_index")
@patch("onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag")
@patch("onyx.indexing.indexing_pipeline.index_doc_batch_with_handler")
@@ -158,7 +155,6 @@ def test_indexing_pipeline_uses_contextual_rag_settings_from_create(
    mock_index_handler: MagicMock,
    mock_get_llm: MagicMock,
    mock_get_doc_index: MagicMock,  # noqa: ARG001
-    mock_get_all_doc_indices_search_settings: MagicMock,  # noqa: ARG001
    mock_get_all_doc_indices: MagicMock,
    baseline_search_settings: None,  # noqa: ARG001
    db_session: Session,
@@ -200,8 +196,8 @@ def test_indexing_pipeline_uses_contextual_rag_settings_from_create(
    )


+@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
@patch("onyx.db.swap_index.get_all_document_indices")
-@patch("onyx.server.manage.search_settings.get_all_document_indices")
@patch("onyx.server.manage.search_settings.get_default_document_index")
@patch("onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag")
@patch("onyx.indexing.indexing_pipeline.index_doc_batch_with_handler")
@@ -209,7 +205,6 @@ def test_indexing_pipeline_uses_updated_contextual_rag_settings(
    mock_index_handler: MagicMock,
    mock_get_llm: MagicMock,
    mock_get_doc_index: MagicMock,  # noqa: ARG001
-    mock_get_all_doc_indices_search_settings: MagicMock,  # noqa: ARG001
    mock_get_all_doc_indices: MagicMock,
    baseline_search_settings: None,  # noqa: ARG001
    db_session: Session,
@@ -271,7 +266,7 @@ def test_indexing_pipeline_uses_updated_contextual_rag_settings(
    )


-@patch("onyx.server.manage.search_settings.get_all_document_indices")
+@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
@patch("onyx.server.manage.search_settings.get_default_document_index")
@patch("onyx.indexing.indexing_pipeline.get_llm_for_contextual_rag")
@patch("onyx.indexing.indexing_pipeline.index_doc_batch_with_handler")
@@ -279,7 +274,6 @@ def test_indexing_pipeline_skips_llm_when_contextual_rag_disabled(
    mock_index_handler: MagicMock,
    mock_get_llm: MagicMock,
    mock_get_doc_index: MagicMock,  # noqa: ARG001
-    mock_get_all_doc_indices_search_settings: MagicMock,  # noqa: ARG001
    baseline_search_settings: None,  # noqa: ARG001
    db_session: Session,
 ) -> None:
--- a/backend/tests/external_dependency_unit/tools/test_python_tool.py
+++ b/backend/tests/external_dependency_unit/tools/test_python_tool.py
@@ -1027,13 +1027,6 @@ class _MockCIHandler(BaseHTTPRequestHandler):
        else:
            self._respond_json(404, {"error": "not found"})

-    def do_GET(self) -> None:
-        self._capture("GET", b"")
-        if self.path == "/health":
-            self._respond_json(200, {"status": "ok"})
-        else:
-            self._respond_json(404, {"error": "not found"})
-
    def do_DELETE(self) -> None:
        self._capture("DELETE", b"")
        self.send_response(200)
@@ -1114,14 +1107,6 @@ def mock_ci_server() -> Generator[MockCodeInterpreterServer, None, None]:
    server.shutdown()


-@pytest.fixture(autouse=True)
-def _clear_health_cache() -> None:
-    """Reset the health check cache before every test."""
-    import onyx.tools.tool_implementations.python.code_interpreter_client as mod
-
-    mod._health_cache = {}
-
-
@pytest.fixture()
 def _attach_python_tool_to_default_persona(db_session: Session) -> None:
    """Ensure the default persona (id=0) has the PythonTool attached."""
--- a/backend/tests/integration/tests/llm_provider/test_llm_provider.py
+++ b/backend/tests/integration/tests/llm_provider/test_llm_provider.py
@@ -427,7 +427,7 @@ def test_delete_default_llm_provider_rejected(reset: None) -> None:  # noqa: ARG
        headers=admin_user.headers,
    )
    assert delete_response.status_code == 400
-    assert "Cannot delete the default LLM provider" in delete_response.json()["message"]
+    assert "Cannot delete the default LLM provider" in delete_response.json()["detail"]

    # Verify provider still exists
    provider_data = _get_provider_by_id(admin_user, created_provider["id"])
@@ -673,8 +673,8 @@ def test_duplicate_provider_name_rejected(reset: None) -> None:  # noqa: ARG001
        headers=admin_user.headers,
        json=base_payload,
    )
-    assert response.status_code == 409
-    assert "already exists" in response.json()["message"]
+    assert response.status_code == 400
+    assert "already exists" in response.json()["detail"]


 def test_rename_provider_rejected(reset: None) -> None:  # noqa: ARG001
@@ -711,7 +711,7 @@ def test_rename_provider_rejected(reset: None) -> None:  # noqa: ARG001
        json=update_payload,
    )
    assert response.status_code == 400
-    assert "not currently supported" in response.json()["message"]
+    assert "not currently supported" in response.json()["detail"]

    # Verify no duplicate was created — only the original provider should exist
    provider = _get_provider_by_id(admin_user, provider_id)
--- a/backend/tests/integration/tests/llm_provider/test_llm_provider_persona_access.py
+++ b/backend/tests/integration/tests/llm_provider/test_llm_provider_persona_access.py
@@ -69,7 +69,7 @@ def test_unauthorized_persona_access_returns_403(

    # Should return 403 Forbidden
    assert response.status_code == 403
-    assert "don't have access to this assistant" in response.json()["message"]
+    assert "don't have access to this assistant" in response.json()["detail"]


 def test_authorized_persona_access_returns_filtered_providers(
@@ -245,4 +245,4 @@ def test_nonexistent_persona_returns_404(

    # Should return 404
    assert response.status_code == 404
-    assert "Persona not found" in response.json()["message"]
+    assert "Persona not found" in response.json()["detail"]
--- a/backend/tests/integration/tests/llm_workflows/test_nightly_provider_chat_workflow.py
+++ b/backend/tests/integration/tests/llm_workflows/test_nightly_provider_chat_workflow.py
@@ -42,78 +42,6 @@ class NightlyProviderConfig(BaseModel):
    strict: bool


-def _stringify_custom_config_value(value: object) -> str:
-    if isinstance(value, str):
-        return value
-    if isinstance(value, (dict, list)):
-        return json.dumps(value)
-    return str(value)
-
-
-def _looks_like_vertex_credentials_payload(
-    raw_custom_config: dict[object, object],
-) -> bool:
-    normalized_keys = {str(key).strip().lower() for key in raw_custom_config}
-    provider_specific_keys = {
-        "vertex_credentials",
-        "credentials_file",
-        "vertex_credentials_file",
-        "google_application_credentials",
-        "vertex_location",
-        "location",
-        "vertex_region",
-        "region",
-    }
-    if normalized_keys & provider_specific_keys:
-        return False
-
-    normalized_type = str(raw_custom_config.get("type", "")).strip().lower()
-    if normalized_type not in {"service_account", "external_account"}:
-        return False
-
-    # Service account JSON usually includes private_key/client_email, while external
-    # account JSON includes credential_source. Either shape should be accepted.
-    has_service_account_markers = any(
-        key in normalized_keys for key in {"private_key", "client_email"}
-    )
-    has_external_account_markers = "credential_source" in normalized_keys
-    return has_service_account_markers or has_external_account_markers
-
-
-def _normalize_custom_config(
-    provider: str, raw_custom_config: dict[object, object]
-) -> dict[str, str]:
-    if provider == "vertex_ai" and _looks_like_vertex_credentials_payload(
-        raw_custom_config
-    ):
-        return {"vertex_credentials": json.dumps(raw_custom_config)}
-
-    normalized: dict[str, str] = {}
-    for raw_key, raw_value in raw_custom_config.items():
-        key = str(raw_key).strip()
-        key_lower = key.lower()
-
-        if provider == "vertex_ai":
-            if key_lower in {
-                "vertex_credentials",
-                "credentials_file",
-                "vertex_credentials_file",
-                "google_application_credentials",
-            }:
-                key = "vertex_credentials"
-            elif key_lower in {
-                "vertex_location",
-                "location",
-                "vertex_region",
-                "region",
-            }:
-                key = "vertex_location"
-
-        normalized[key] = _stringify_custom_config_value(raw_value)
-
-    return normalized
-
-
 def _env_true(env_var: str, default: bool = False) -> bool:
    value = os.environ.get(env_var)
    if value is None:
@@ -152,9 +80,7 @@ def _load_provider_config() -> NightlyProviderConfig:
        parsed = json.loads(custom_config_json)
        if not isinstance(parsed, dict):
            raise ValueError(f"{_ENV_CUSTOM_CONFIG_JSON} must be a JSON object")
-        custom_config = _normalize_custom_config(
-            provider=provider, raw_custom_config=parsed
-        )
+        custom_config = {str(key): str(value) for key, value in parsed.items()}

    if provider == "ollama_chat" and api_key and not custom_config:
        custom_config = {"OLLAMA_API_KEY": api_key}
@@ -222,23 +148,6 @@ def _validate_provider_config(config: NightlyProviderConfig) -> None:
                ),
            )

-    if config.provider == "vertex_ai":
-        has_vertex_credentials = bool(
-            config.custom_config and config.custom_config.get("vertex_credentials")
-        )
-        if not has_vertex_credentials:
-            configured_keys = (
-                sorted(config.custom_config.keys()) if config.custom_config else []
-            )
-            _skip_or_fail(
-                strict=config.strict,
-                message=(
-                    f"{_ENV_CUSTOM_CONFIG_JSON} must include 'vertex_credentials' "
-                    f"for provider '{config.provider}'. "
-                    f"Found keys: {configured_keys}"
-                ),
-            )
-

 def _assert_integration_mode_enabled() -> None:
    assert (
@@ -284,7 +193,6 @@ def _create_provider_payload(
    return {
        "name": provider_name,
        "provider": provider,
-        "model": model_name,
        "api_key": api_key,
        "api_base": api_base,
        "api_version": api_version,
@@ -300,23 +208,24 @@ def _create_provider_payload(
    }


-def _ensure_provider_is_default(
-    provider_id: int, model_name: str, admin_user: DATestUser
-) -> None:
+def _ensure_provider_is_default(provider_id: int, admin_user: DATestUser) -> None:
    list_response = requests.get(
        f"{API_SERVER_URL}/admin/llm/provider",
        headers=admin_user.headers,
    )
    list_response.raise_for_status()
-    default_text = list_response.json().get("default_text")
-    assert default_text is not None, "Expected a default provider after setting default"
-    assert default_text.get("provider_id") == provider_id, (
-        f"Expected provider {provider_id} to be default, "
-        f"found {default_text.get('provider_id')}"
+    providers = list_response.json()
+
+    current_default = next(
+        (provider for provider in providers if provider.get("is_default_provider")),
+        None,
    )
    assert (
-        default_text.get("model_name") == model_name
-    ), f"Expected default model {model_name}, found {default_text.get('model_name')}"
+        current_default is not None
+    ), "Expected a default provider after setting provider as default"
+    assert (
+        current_default["id"] == provider_id
+    ), f"Expected provider {provider_id} to be default, found {current_default['id']}"


 def _run_chat_assertions(
@@ -417,9 +326,8 @@ def _create_and_test_provider_for_model(

    try:
        set_default_response = requests.post(
-            f"{API_SERVER_URL}/admin/llm/default",
+            f"{API_SERVER_URL}/admin/llm/provider/{provider_id}/default",
            headers=admin_user.headers,
-            json={"provider_id": provider_id, "model_name": model_name},
        )
        assert set_default_response.status_code == 200, (
            f"Setting default provider failed for provider={config.provider} "
@@ -427,9 +335,7 @@ def _create_and_test_provider_for_model(
            f"{set_default_response.text}"
        )

-        _ensure_provider_is_default(
-            provider_id=provider_id, model_name=model_name, admin_user=admin_user
-        )
+        _ensure_provider_is_default(provider_id=provider_id, admin_user=admin_user)
        _run_chat_assertions(
            admin_user=admin_user,
            search_tool_id=search_tool_id,
--- a/backend/tests/integration/tests/permissions/test_file_connector_permissions.py
+++ b/backend/tests/integration/tests/permissions/test_file_connector_permissions.py
@@ -1,234 +0,0 @@
-import io
-import json
-import os
-
-import pytest
-import requests
-
-from onyx.db.enums import AccessType
-from onyx.db.models import UserRole
-from onyx.server.documents.models import DocumentSource
-from tests.integration.common_utils.constants import API_SERVER_URL
-from tests.integration.common_utils.managers.cc_pair import CCPairManager
-from tests.integration.common_utils.managers.connector import ConnectorManager
-from tests.integration.common_utils.managers.credential import CredentialManager
-from tests.integration.common_utils.managers.user import DATestUser
-from tests.integration.common_utils.managers.user import UserManager
-from tests.integration.common_utils.managers.user_group import UserGroupManager
-
-
-def _upload_connector_file(
-    *,
-    user_performing_action: DATestUser,
-    file_name: str,
-    content: bytes,
-) -> tuple[str, str]:
-    headers = user_performing_action.headers.copy()
-    headers.pop("Content-Type", None)
-
-    response = requests.post(
-        f"{API_SERVER_URL}/manage/admin/connector/file/upload",
-        files=[("files", (file_name, io.BytesIO(content), "text/plain"))],
-        headers=headers,
-    )
-    response.raise_for_status()
-    payload = response.json()
-    return payload["file_paths"][0], payload["file_names"][0]
-
-
-def _update_connector_files(
-    *,
-    connector_id: int,
-    user_performing_action: DATestUser,
-    file_ids_to_remove: list[str],
-    new_file_name: str,
-    new_file_content: bytes,
-) -> requests.Response:
-    headers = user_performing_action.headers.copy()
-    headers.pop("Content-Type", None)
-
-    return requests.post(
-        f"{API_SERVER_URL}/manage/admin/connector/{connector_id}/files/update",
-        data={"file_ids_to_remove": json.dumps(file_ids_to_remove)},
-        files=[("files", (new_file_name, io.BytesIO(new_file_content), "text/plain"))],
-        headers=headers,
-    )
-
-
-def _list_connector_files(
-    *,
-    connector_id: int,
-    user_performing_action: DATestUser,
-) -> requests.Response:
-    return requests.get(
-        f"{API_SERVER_URL}/manage/admin/connector/{connector_id}/files",
-        headers=user_performing_action.headers,
-    )
-
-
-@pytest.mark.skipif(
-    os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() != "true",
-    reason="Curator and user group tests are enterprise only",
-)
-@pytest.mark.usefixtures("reset")
-def test_only_global_curator_can_update_public_file_connector_files() -> None:
-    admin_user = UserManager.create(name="admin_user")
-
-    global_curator_creator = UserManager.create(name="global_curator_creator")
-    global_curator_creator = UserManager.set_role(
-        user_to_set=global_curator_creator,
-        target_role=UserRole.GLOBAL_CURATOR,
-        user_performing_action=admin_user,
-    )
-
-    global_curator_editor = UserManager.create(name="global_curator_editor")
-    global_curator_editor = UserManager.set_role(
-        user_to_set=global_curator_editor,
-        target_role=UserRole.GLOBAL_CURATOR,
-        user_performing_action=admin_user,
-    )
-
-    curator_user = UserManager.create(name="curator_user")
-    curator_group = UserGroupManager.create(
-        name="curator_group",
-        user_ids=[curator_user.id],
-        cc_pair_ids=[],
-        user_performing_action=admin_user,
-    )
-    UserGroupManager.wait_for_sync(
-        user_groups_to_check=[curator_group],
-        user_performing_action=admin_user,
-    )
-    UserGroupManager.set_curator_status(
-        test_user_group=curator_group,
-        user_to_set_as_curator=curator_user,
-        user_performing_action=admin_user,
-    )
-
-    initial_file_id, initial_file_name = _upload_connector_file(
-        user_performing_action=global_curator_creator,
-        file_name="initial-file.txt",
-        content=b"initial file content",
-    )
-
-    connector = ConnectorManager.create(
-        user_performing_action=global_curator_creator,
-        name="public_file_connector",
-        source=DocumentSource.FILE,
-        connector_specific_config={
-            "file_locations": [initial_file_id],
-            "file_names": [initial_file_name],
-            "zip_metadata_file_id": None,
-        },
-        access_type=AccessType.PUBLIC,
-        groups=[],
-    )
-    credential = CredentialManager.create(
-        user_performing_action=global_curator_creator,
-        source=DocumentSource.FILE,
-        curator_public=True,
-        groups=[],
-        name="public_file_connector_credential",
-    )
-    CCPairManager.create(
-        connector_id=connector.id,
-        credential_id=credential.id,
-        user_performing_action=global_curator_creator,
-        access_type=AccessType.PUBLIC,
-        groups=[],
-        name="public_file_connector_cc_pair",
-    )
-
-    curator_list_response = _list_connector_files(
-        connector_id=connector.id,
-        user_performing_action=curator_user,
-    )
-    curator_list_response.raise_for_status()
-    curator_list_payload = curator_list_response.json()
-    assert any(f["file_id"] == initial_file_id for f in curator_list_payload["files"])
-
-    global_curator_list_response = _list_connector_files(
-        connector_id=connector.id,
-        user_performing_action=global_curator_editor,
-    )
-    global_curator_list_response.raise_for_status()
-    global_curator_list_payload = global_curator_list_response.json()
-    assert any(
-        f["file_id"] == initial_file_id for f in global_curator_list_payload["files"]
-    )
-
-    denied_response = _update_connector_files(
-        connector_id=connector.id,
-        user_performing_action=curator_user,
-        file_ids_to_remove=[initial_file_id],
-        new_file_name="curator-file.txt",
-        new_file_content=b"curator updated file",
-    )
-    assert denied_response.status_code == 403
-
-    allowed_response = _update_connector_files(
-        connector_id=connector.id,
-        user_performing_action=global_curator_editor,
-        file_ids_to_remove=[initial_file_id],
-        new_file_name="global-curator-file.txt",
-        new_file_content=b"global curator updated file",
-    )
-    allowed_response.raise_for_status()
-
-    payload = allowed_response.json()
-    assert initial_file_id not in payload["file_paths"]
-    assert "global-curator-file.txt" in payload["file_names"]
-
-    creator_group = UserGroupManager.create(
-        name="creator_group",
-        user_ids=[global_curator_creator.id],
-        cc_pair_ids=[],
-        user_performing_action=admin_user,
-    )
-    UserGroupManager.wait_for_sync(
-        user_groups_to_check=[creator_group],
-        user_performing_action=admin_user,
-    )
-
-    private_file_id, private_file_name = _upload_connector_file(
-        user_performing_action=global_curator_creator,
-        file_name="private-initial-file.txt",
-        content=b"private initial file content",
-    )
-
-    private_connector = ConnectorManager.create(
-        user_performing_action=global_curator_creator,
-        name="private_file_connector",
-        source=DocumentSource.FILE,
-        connector_specific_config={
-            "file_locations": [private_file_id],
-            "file_names": [private_file_name],
-            "zip_metadata_file_id": None,
-        },
-        access_type=AccessType.PRIVATE,
-        groups=[creator_group.id],
-    )
-    private_credential = CredentialManager.create(
-        user_performing_action=global_curator_creator,
-        source=DocumentSource.FILE,
-        curator_public=False,
-        groups=[creator_group.id],
-        name="private_file_connector_credential",
-    )
-    CCPairManager.create(
-        connector_id=private_connector.id,
-        credential_id=private_credential.id,
-        user_performing_action=global_curator_creator,
-        access_type=AccessType.PRIVATE,
-        groups=[creator_group.id],
-        name="private_file_connector_cc_pair",
-    )
-
-    private_denied_response = _update_connector_files(
-        connector_id=private_connector.id,
-        user_performing_action=global_curator_editor,
-        file_ids_to_remove=[private_file_id],
-        new_file_name="global-curator-private-file.txt",
-        new_file_content=b"global curator private update",
-    )
-    assert private_denied_response.status_code == 403
--- a/backend/tests/integration/tests/search_settings/test_search_settings.py
+++ b/backend/tests/integration/tests/search_settings/test_search_settings.py
@@ -1,3 +1,4 @@
+import pytest
 import requests

 from tests.integration.common_utils.constants import API_SERVER_URL
@@ -364,6 +365,7 @@ def test_update_contextual_rag_missing_model_name(
    assert "Provider name and model name are required" in response.json()["detail"]


+@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
 def test_set_new_search_settings_with_contextual_rag(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
@@ -392,6 +394,7 @@ def test_set_new_search_settings_with_contextual_rag(
    _cancel_new_embedding(admin_user)


+@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
 def test_set_new_search_settings_without_contextual_rag(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
@@ -416,6 +419,7 @@ def test_set_new_search_settings_without_contextual_rag(
    _cancel_new_embedding(admin_user)


+@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
 def test_set_new_then_update_inference_settings(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
@@ -453,6 +457,7 @@ def test_set_new_then_update_inference_settings(
    _cancel_new_embedding(admin_user)


+@pytest.mark.skip(reason="Set new search settings is temporarily disabled.")
 def test_set_new_search_settings_replaces_previous_secondary(
    reset: None,  # noqa: ARG001
    admin_user: DATestUser,
--- a/backend/tests/unit/ee/onyx/server/billing/test_billing_api.py
+++ b/backend/tests/unit/ee/onyx/server/billing/test_billing_api.py
@@ -11,8 +11,7 @@ from ee.onyx.server.billing.models import CreateCheckoutSessionResponse
 from ee.onyx.server.billing.models import CreateCustomerPortalSessionResponse
 from ee.onyx.server.billing.models import SeatUpdateResponse
 from ee.onyx.server.billing.models import SubscriptionStatusResponse
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
+from ee.onyx.server.billing.service import BillingServiceError


 class TestCreateCheckoutSession:
@@ -89,25 +88,22 @@ class TestCreateCheckoutSession:
        mock_get_tenant: MagicMock,
        mock_service: AsyncMock,
    ) -> None:
-        """Should propagate OnyxError when service fails."""
+        """Should raise HTTPException when service fails."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.billing.api import create_checkout_session

        mock_get_license.return_value = None
        mock_get_tenant.return_value = "tenant_123"
-        mock_service.side_effect = OnyxError(
-            OnyxErrorCode.BAD_GATEWAY,
-            "Stripe error",
-            status_code_override=502,
-        )
+        mock_service.side_effect = BillingServiceError("Stripe error", 502)

-        with pytest.raises(OnyxError) as exc_info:
+        with pytest.raises(HTTPException) as exc_info:
            await create_checkout_session(
                request=None, _=MagicMock(), db_session=MagicMock()
            )

        assert exc_info.value.status_code == 502
-        assert exc_info.value.error_code is OnyxErrorCode.BAD_GATEWAY
-        assert exc_info.value.message == "Stripe error"
+        assert "Stripe error" in exc_info.value.detail


 class TestCreateCustomerPortalSession:
@@ -125,19 +121,20 @@ class TestCreateCustomerPortalSession:
        mock_service: AsyncMock,  # noqa: ARG002
    ) -> None:
        """Should reject self-hosted without license."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.billing.api import create_customer_portal_session

        mock_get_license.return_value = None
        mock_get_tenant.return_value = None

-        with pytest.raises(OnyxError) as exc_info:
+        with pytest.raises(HTTPException) as exc_info:
            await create_customer_portal_session(
                request=None, _=MagicMock(), db_session=MagicMock()
            )

        assert exc_info.value.status_code == 400
-        assert exc_info.value.error_code is OnyxErrorCode.VALIDATION_ERROR
-        assert exc_info.value.message == "No license found"
+        assert "No license found" in exc_info.value.detail

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.create_portal_service")
@@ -230,6 +227,8 @@ class TestUpdateSeats:
        mock_get_tenant: MagicMock,
    ) -> None:
        """Should reject self-hosted without license."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.billing.api import update_seats
        from ee.onyx.server.billing.models import SeatUpdateRequest

@@ -238,12 +237,11 @@ class TestUpdateSeats:

        request = SeatUpdateRequest(new_seat_count=10)

-        with pytest.raises(OnyxError) as exc_info:
+        with pytest.raises(HTTPException) as exc_info:
            await update_seats(request=request, _=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 400
-        assert exc_info.value.error_code is OnyxErrorCode.VALIDATION_ERROR
-        assert exc_info.value.message == "No license found"
+        assert "No license found" in exc_info.value.detail

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.get_used_seats")
@@ -297,27 +295,26 @@ class TestUpdateSeats:
        mock_service: AsyncMock,
        mock_get_used_seats: MagicMock,
    ) -> None:
-        """Should propagate OnyxError from service layer."""
+        """Should convert BillingServiceError to HTTPException."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.billing.api import update_seats
        from ee.onyx.server.billing.models import SeatUpdateRequest

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_get_used_seats.return_value = 0
-        mock_service.side_effect = OnyxError(
-            OnyxErrorCode.BAD_GATEWAY,
-            "Cannot reduce below 10 seats",
-            status_code_override=400,
+        mock_service.side_effect = BillingServiceError(
+            "Cannot reduce below 10 seats", 400
        )

        request = SeatUpdateRequest(new_seat_count=5)

-        with pytest.raises(OnyxError) as exc_info:
+        with pytest.raises(HTTPException) as exc_info:
            await update_seats(request=request, _=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 400
-        assert exc_info.value.error_code is OnyxErrorCode.BAD_GATEWAY
-        assert exc_info.value.message == "Cannot reduce below 10 seats"
+        assert "Cannot reduce below 10 seats" in exc_info.value.detail


 class TestCircuitBreaker:
@@ -335,18 +332,19 @@ class TestCircuitBreaker:
        mock_circuit_open: MagicMock,
    ) -> None:
        """Should return 503 when circuit breaker is open."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.billing.api import get_billing_information

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_circuit_open.return_value = True

-        with pytest.raises(OnyxError) as exc_info:
+        with pytest.raises(HTTPException) as exc_info:
            await get_billing_information(_=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 503
-        assert exc_info.value.error_code is OnyxErrorCode.SERVICE_UNAVAILABLE
-        assert "Connect to Stripe" in exc_info.value.message
+        assert "Connect to Stripe" in exc_info.value.detail

    @pytest.mark.asyncio
    @patch("ee.onyx.server.billing.api.MULTI_TENANT", False)
@@ -364,18 +362,16 @@ class TestCircuitBreaker:
        mock_open_circuit: MagicMock,
    ) -> None:
        """Should open circuit breaker on 502 error."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.billing.api import get_billing_information

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_circuit_open_check.return_value = False
-        mock_service.side_effect = OnyxError(
-            OnyxErrorCode.BAD_GATEWAY,
-            "Connection failed",
-            status_code_override=502,
-        )
+        mock_service.side_effect = BillingServiceError("Connection failed", 502)

-        with pytest.raises(OnyxError) as exc_info:
+        with pytest.raises(HTTPException) as exc_info:
            await get_billing_information(_=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 502
@@ -397,18 +393,16 @@ class TestCircuitBreaker:
        mock_open_circuit: MagicMock,
    ) -> None:
        """Should open circuit breaker on 503 error."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.billing.api import get_billing_information

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_circuit_open_check.return_value = False
-        mock_service.side_effect = OnyxError(
-            OnyxErrorCode.BAD_GATEWAY,
-            "Service unavailable",
-            status_code_override=503,
-        )
+        mock_service.side_effect = BillingServiceError("Service unavailable", 503)

-        with pytest.raises(OnyxError) as exc_info:
+        with pytest.raises(HTTPException) as exc_info:
            await get_billing_information(_=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 503
@@ -430,18 +424,16 @@ class TestCircuitBreaker:
        mock_open_circuit: MagicMock,
    ) -> None:
        """Should open circuit breaker on 504 error."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.billing.api import get_billing_information

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_circuit_open_check.return_value = False
-        mock_service.side_effect = OnyxError(
-            OnyxErrorCode.BAD_GATEWAY,
-            "Gateway timeout",
-            status_code_override=504,
-        )
+        mock_service.side_effect = BillingServiceError("Gateway timeout", 504)

-        with pytest.raises(OnyxError) as exc_info:
+        with pytest.raises(HTTPException) as exc_info:
            await get_billing_information(_=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 504
@@ -463,18 +455,16 @@ class TestCircuitBreaker:
        mock_open_circuit: MagicMock,
    ) -> None:
        """Should NOT open circuit breaker on 400 error (client error)."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.billing.api import get_billing_information

        mock_get_license.return_value = "license_blob"
        mock_get_tenant.return_value = None
        mock_circuit_open_check.return_value = False
-        mock_service.side_effect = OnyxError(
-            OnyxErrorCode.BAD_GATEWAY,
-            "Bad request",
-            status_code_override=400,
-        )
+        mock_service.side_effect = BillingServiceError("Bad request", 400)

-        with pytest.raises(OnyxError) as exc_info:
+        with pytest.raises(HTTPException) as exc_info:
            await get_billing_information(_=MagicMock(), db_session=MagicMock())

        assert exc_info.value.status_code == 400
--- a/backend/tests/unit/ee/onyx/server/billing/test_billing_service.py
+++ b/backend/tests/unit/ee/onyx/server/billing/test_billing_service.py
@@ -14,8 +14,7 @@ from ee.onyx.server.billing.models import CreateCheckoutSessionResponse
 from ee.onyx.server.billing.models import CreateCustomerPortalSessionResponse
 from ee.onyx.server.billing.models import SeatUpdateResponse
 from ee.onyx.server.billing.models import SubscriptionStatusResponse
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
+from ee.onyx.server.billing.service import BillingServiceError


 class TestMakeBillingRequest:
@@ -79,7 +78,7 @@ class TestMakeBillingRequest:
        mock_base_url: MagicMock,
        mock_headers: MagicMock,
    ) -> None:
-        """Should raise OnyxError on HTTP error."""
+        """Should raise BillingServiceError on HTTP error."""
        from ee.onyx.server.billing.service import _make_billing_request

        mock_base_url.return_value = "https://api.example.com"
@@ -92,7 +91,7 @@ class TestMakeBillingRequest:
        mock_client = make_mock_http_client("post", side_effect=error)

        with patch("httpx.AsyncClient", mock_client):
-            with pytest.raises(OnyxError) as exc_info:
+            with pytest.raises(BillingServiceError) as exc_info:
                await _make_billing_request(
                    method="POST",
                    path="/test",
@@ -100,7 +99,6 @@ class TestMakeBillingRequest:
                )

        assert exc_info.value.status_code == 400
-        assert exc_info.value.error_code is OnyxErrorCode.BAD_GATEWAY
        assert "Bad request" in exc_info.value.message

    @pytest.mark.asyncio
@@ -138,7 +136,7 @@ class TestMakeBillingRequest:
        mock_base_url: MagicMock,
        mock_headers: MagicMock,
    ) -> None:
-        """Should raise OnyxError on connection error."""
+        """Should raise BillingServiceError on connection error."""
        from ee.onyx.server.billing.service import _make_billing_request

        mock_base_url.return_value = "https://api.example.com"
@@ -147,11 +145,10 @@ class TestMakeBillingRequest:
        mock_client = make_mock_http_client("post", side_effect=error)

        with patch("httpx.AsyncClient", mock_client):
-            with pytest.raises(OnyxError) as exc_info:
+            with pytest.raises(BillingServiceError) as exc_info:
                await _make_billing_request(method="POST", path="/test")

        assert exc_info.value.status_code == 502
-        assert exc_info.value.error_code is OnyxErrorCode.BAD_GATEWAY
        assert "Failed to connect" in exc_info.value.message


--- a/backend/tests/unit/ee/onyx/server/settings/test_license_enforcement_settings.py
+++ b/backend/tests/unit/ee/onyx/server/settings/test_license_enforcement_settings.py
@@ -281,10 +281,9 @@ class TestApplyLicenseStatusToSettings:
        }


-class TestSettingsDefaults:
-    """Verify Settings model defaults for CE deployments."""
+class TestSettingsDefaultEEDisabled:
+    """Verify the Settings model defaults ee_features_enabled to False."""

    def test_default_ee_features_disabled(self) -> None:
-        """CE default: ee_features_enabled is False."""
        settings = Settings()
        assert settings.ee_features_enabled is False
--- a/backend/tests/unit/ee/onyx/server/tenants/test_billing_api.py
+++ b/backend/tests/unit/ee/onyx/server/tenants/test_billing_api.py
@@ -7,9 +7,6 @@ from unittest.mock import patch
 import httpx
 import pytest

-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
-

 class TestGetStripePublishableKey:
    """Tests for get_stripe_publishable_key endpoint."""
@@ -65,14 +62,15 @@ class TestGetStripePublishableKey:
    )
    async def test_rejects_invalid_env_var_key_format(self) -> None:
        """Should reject keys that don't start with pk_."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key

-        with pytest.raises(OnyxError) as exc_info:
+        with pytest.raises(HTTPException) as exc_info:
            await get_stripe_publishable_key()

        assert exc_info.value.status_code == 500
-        assert exc_info.value.error_code is OnyxErrorCode.INTERNAL_ERROR
-        assert exc_info.value.message == "Invalid Stripe publishable key format"
+        assert "Invalid Stripe publishable key format" in exc_info.value.detail

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE", None)
@@ -82,6 +80,8 @@ class TestGetStripePublishableKey:
    )
    async def test_rejects_invalid_s3_key_format(self) -> None:
        """Should reject keys from S3 that don't start with pk_."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key

        mock_response = MagicMock()
@@ -92,12 +92,11 @@ class TestGetStripePublishableKey:
            mock_client.return_value.__aenter__.return_value.get = AsyncMock(
                return_value=mock_response
            )
-            with pytest.raises(OnyxError) as exc_info:
+            with pytest.raises(HTTPException) as exc_info:
                await get_stripe_publishable_key()

        assert exc_info.value.status_code == 500
-        assert exc_info.value.error_code is OnyxErrorCode.INTERNAL_ERROR
-        assert exc_info.value.message == "Invalid Stripe publishable key format"
+        assert "Invalid Stripe publishable key format" in exc_info.value.detail

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE", None)
@@ -107,32 +106,34 @@ class TestGetStripePublishableKey:
    )
    async def test_handles_s3_fetch_error(self) -> None:
        """Should return error when S3 fetch fails."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key

        with patch("httpx.AsyncClient") as mock_client:
            mock_client.return_value.__aenter__.return_value.get = AsyncMock(
                side_effect=httpx.HTTPError("Connection failed")
            )
-            with pytest.raises(OnyxError) as exc_info:
+            with pytest.raises(HTTPException) as exc_info:
                await get_stripe_publishable_key()

        assert exc_info.value.status_code == 500
-        assert exc_info.value.error_code is OnyxErrorCode.INTERNAL_ERROR
-        assert exc_info.value.message == "Failed to fetch Stripe publishable key"
+        assert "Failed to fetch Stripe publishable key" in exc_info.value.detail

    @pytest.mark.asyncio
    @patch("ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_OVERRIDE", None)
    @patch("ee.onyx.server.tenants.billing_api.STRIPE_PUBLISHABLE_KEY_URL", None)
    async def test_error_when_no_config(self) -> None:
        """Should return error when neither env var nor S3 URL is configured."""
+        from fastapi import HTTPException
+
        from ee.onyx.server.tenants.billing_api import get_stripe_publishable_key

-        with pytest.raises(OnyxError) as exc_info:
+        with pytest.raises(HTTPException) as exc_info:
            await get_stripe_publishable_key()

        assert exc_info.value.status_code == 500
-        assert exc_info.value.error_code is OnyxErrorCode.INTERNAL_ERROR
-        assert "not configured" in exc_info.value.message
+        assert "not configured" in exc_info.value.detail

    @pytest.mark.asyncio
    @patch(
--- a/backend/tests/unit/onyx/chat/test_save_chat_files.py
+++ b/backend/tests/unit/onyx/chat/test_save_chat_files.py
@@ -1,178 +0,0 @@
-"""Tests for _extract_referenced_file_descriptors in save_chat.py.
-
-Verifies that only code interpreter generated files actually referenced
-in the assistant's message text are extracted as FileDescriptors for
-cross-turn persistence.
-"""
-
-from onyx.chat.save_chat import _extract_referenced_file_descriptors
-from onyx.file_store.models import ChatFileType
-from onyx.tools.models import PythonExecutionFile
-from onyx.tools.models import ToolCallInfo
-
-
-def _make_tool_call_info(
-    generated_files: list[PythonExecutionFile] | None = None,
-    tool_name: str = "python",
-) -> ToolCallInfo:
-    return ToolCallInfo(
-        parent_tool_call_id=None,
-        turn_index=0,
-        tab_index=0,
-        tool_name=tool_name,
-        tool_call_id="tc_1",
-        tool_id=1,
-        reasoning_tokens=None,
-        tool_call_arguments={"code": "print('hi')"},
-        tool_call_response="{}",
-        generated_files=generated_files,
-    )
-
-
-def test_returns_empty_when_no_generated_files() -> None:
-    tool_call = _make_tool_call_info(generated_files=None)
-    result = _extract_referenced_file_descriptors([tool_call], "some message")
-    assert result == []
-
-
-def test_returns_empty_when_file_not_referenced() -> None:
-    files = [
-        PythonExecutionFile(
-            filename="chart.png",
-            file_link="http://localhost/api/chat/file/abc-123",
-        )
-    ]
-    tool_call = _make_tool_call_info(generated_files=files)
-    result = _extract_referenced_file_descriptors([tool_call], "Here is your answer.")
-    assert result == []
-
-
-def test_extracts_referenced_file() -> None:
-    file_id = "abc-123-def"
-    files = [
-        PythonExecutionFile(
-            filename="chart.png",
-            file_link=f"http://localhost/api/chat/file/{file_id}",
-        )
-    ]
-    tool_call = _make_tool_call_info(generated_files=files)
-    message = (
-        f"Here is the chart: [chart.png](http://localhost/api/chat/file/{file_id})"
-    )
-
-    result = _extract_referenced_file_descriptors([tool_call], message)
-
-    assert len(result) == 1
-    assert result[0]["id"] == file_id
-    assert result[0]["type"] == ChatFileType.IMAGE
-    assert result[0]["name"] == "chart.png"
-
-
-def test_filters_unreferenced_files() -> None:
-    referenced_id = "ref-111"
-    unreferenced_id = "unref-222"
-    files = [
-        PythonExecutionFile(
-            filename="chart.png",
-            file_link=f"http://localhost/api/chat/file/{referenced_id}",
-        ),
-        PythonExecutionFile(
-            filename="data.csv",
-            file_link=f"http://localhost/api/chat/file/{unreferenced_id}",
-        ),
-    ]
-    tool_call = _make_tool_call_info(generated_files=files)
-    message = f"Here is the chart: [chart.png](http://localhost/api/chat/file/{referenced_id})"
-
-    result = _extract_referenced_file_descriptors([tool_call], message)
-
-    assert len(result) == 1
-    assert result[0]["id"] == referenced_id
-    assert result[0]["name"] == "chart.png"
-
-
-def test_extracts_from_multiple_tool_calls() -> None:
-    id_1 = "file-aaa"
-    id_2 = "file-bbb"
-    tc1 = _make_tool_call_info(
-        generated_files=[
-            PythonExecutionFile(
-                filename="plot.png",
-                file_link=f"http://localhost/api/chat/file/{id_1}",
-            )
-        ]
-    )
-    tc2 = _make_tool_call_info(
-        generated_files=[
-            PythonExecutionFile(
-                filename="report.csv",
-                file_link=f"http://localhost/api/chat/file/{id_2}",
-            )
-        ]
-    )
-    message = (
-        f"[plot.png](http://localhost/api/chat/file/{id_1}) "
-        f"and [report.csv](http://localhost/api/chat/file/{id_2})"
-    )
-
-    result = _extract_referenced_file_descriptors([tc1, tc2], message)
-
-    assert len(result) == 2
-    ids = {d["id"] for d in result}
-    assert ids == {id_1, id_2}
-
-
-def test_csv_file_type() -> None:
-    file_id = "csv-123"
-    files = [
-        PythonExecutionFile(
-            filename="data.csv",
-            file_link=f"http://localhost/api/chat/file/{file_id}",
-        )
-    ]
-    tool_call = _make_tool_call_info(generated_files=files)
-    message = f"[data.csv](http://localhost/api/chat/file/{file_id})"
-
-    result = _extract_referenced_file_descriptors([tool_call], message)
-
-    assert len(result) == 1
-    assert result[0]["type"] == ChatFileType.CSV
-
-
-def test_unknown_extension_defaults_to_plain_text() -> None:
-    file_id = "bin-456"
-    files = [
-        PythonExecutionFile(
-            filename="output.xyz",
-            file_link=f"http://localhost/api/chat/file/{file_id}",
-        )
-    ]
-    tool_call = _make_tool_call_info(generated_files=files)
-    message = f"[output.xyz](http://localhost/api/chat/file/{file_id})"
-
-    result = _extract_referenced_file_descriptors([tool_call], message)
-
-    assert len(result) == 1
-    assert result[0]["type"] == ChatFileType.PLAIN_TEXT
-
-
-def test_skips_tool_calls_without_generated_files() -> None:
-    file_id = "img-789"
-    tc_no_files = _make_tool_call_info(generated_files=None)
-    tc_empty = _make_tool_call_info(generated_files=[])
-    tc_with_files = _make_tool_call_info(
-        generated_files=[
-            PythonExecutionFile(
-                filename="result.png",
-                file_link=f"http://localhost/api/chat/file/{file_id}",
-            )
-        ]
-    )
-    message = f"[result.png](http://localhost/api/chat/file/{file_id})"
-
-    result = _extract_referenced_file_descriptors(
-        [tc_no_files, tc_empty, tc_with_files], message
-    )
-
-    assert len(result) == 1
-    assert result[0]["id"] == file_id
--- a/backend/tests/unit/onyx/error_handling/init.py
+++ b/backend/tests/unit/onyx/error_handling/init.py
--- a/backend/tests/unit/onyx/error_handling/test_exceptions.py
+++ b/backend/tests/unit/onyx/error_handling/test_exceptions.py
@@ -1,90 +0,0 @@
-"""Tests for OnyxError and the global exception handler."""
-
-import pytest
-from fastapi import FastAPI
-from fastapi.testclient import TestClient
-
-from onyx.error_handling.error_codes import OnyxErrorCode
-from onyx.error_handling.exceptions import OnyxError
-from onyx.error_handling.exceptions import register_onyx_exception_handlers
-
-
-class TestOnyxError:
-    """Unit tests for OnyxError construction and properties."""
-
-    def test_basic_construction(self) -> None:
-        err = OnyxError(OnyxErrorCode.NOT_FOUND, "Session not found")
-        assert err.error_code is OnyxErrorCode.NOT_FOUND
-        assert err.message == "Session not found"
-        assert err.status_code == 404
-
-    def test_message_defaults_to_code(self) -> None:
-        err = OnyxError(OnyxErrorCode.UNAUTHENTICATED)
-        assert err.message == "UNAUTHENTICATED"
-        assert str(err) == "UNAUTHENTICATED"
-
-    def test_status_code_override(self) -> None:
-        err = OnyxError(
-            OnyxErrorCode.BAD_GATEWAY,
-            "upstream failed",
-            status_code_override=503,
-        )
-        assert err.status_code == 503
-        # error_code still reports its own default
-        assert err.error_code.status_code == 502
-
-    def test_no_override_uses_error_code_status(self) -> None:
-        err = OnyxError(OnyxErrorCode.RATE_LIMITED, "slow down")
-        assert err.status_code == 429
-
-    def test_is_exception(self) -> None:
-        err = OnyxError(OnyxErrorCode.INTERNAL_ERROR)
-        assert isinstance(err, Exception)
-
-
-class TestExceptionHandler:
-    """Integration test: OnyxError → JSON response via FastAPI TestClient."""
-
-    @pytest.fixture()
-    def client(self) -> TestClient:
-        app = FastAPI()
-        register_onyx_exception_handlers(app)
-
-        @app.get("/boom")
-        def _boom() -> None:
-            raise OnyxError(OnyxErrorCode.NOT_FOUND, "Thing not found")
-
-        @app.get("/boom-override")
-        def _boom_override() -> None:
-            raise OnyxError(
-                OnyxErrorCode.BAD_GATEWAY,
-                "upstream 503",
-                status_code_override=503,
-            )
-
-        @app.get("/boom-default-msg")
-        def _boom_default() -> None:
-            raise OnyxError(OnyxErrorCode.UNAUTHENTICATED)
-
-        return TestClient(app, raise_server_exceptions=False)
-
-    def test_returns_correct_status_and_body(self, client: TestClient) -> None:
-        resp = client.get("/boom")
-        assert resp.status_code == 404
-        body = resp.json()
-        assert body["error_code"] == "NOT_FOUND"
-        assert body["message"] == "Thing not found"
-
-    def test_status_code_override_in_response(self, client: TestClient) -> None:
-        resp = client.get("/boom-override")
-        assert resp.status_code == 503
-        body = resp.json()
-        assert body["error_code"] == "BAD_GATEWAY"
-        assert body["message"] == "upstream 503"
-
-    def test_default_message(self, client: TestClient) -> None:
-        resp = client.get("/boom-default-msg")
-        assert resp.status_code == 401
-        body = resp.json()
-        assert body["error_code"] == "UNAUTHENTICATED"
-        assert body["message"] == "UNAUTHENTICATED"
--- a/backend/tests/unit/onyx/onyxbot/test_slack_formatting.py
+++ b/backend/tests/unit/onyx/onyxbot/test_slack_formatting.py
@@ -104,102 +104,3 @@ def test_format_slack_message_ampersand_not_double_escaped() -> None:

    assert "&amp;" in formatted
    assert "&quot;" not in formatted
-
-
-# -- Table rendering tests --
-
-
-def test_table_renders_as_vertical_cards() -> None:
-    message = (
-        "| Feature | Status | Owner |\n"
-        "|---------|--------|-------|\n"
-        "| Auth | Done | Alice |\n"
-        "| Search | In Progress | Bob |\n"
-    )
-
-    formatted = format_slack_message(message)
-
-    assert "*Auth*\n  • Status: Done\n  • Owner: Alice" in formatted
-    assert "*Search*\n  • Status: In Progress\n  • Owner: Bob" in formatted
-    # Cards separated by blank line
-    assert "Owner: Alice\n\n*Search*" in formatted
-    # No raw pipe-and-dash table syntax
-    assert "---|" not in formatted
-
-
-def test_table_single_column() -> None:
-    message = "| Name |\n|------|\n| Alice |\n| Bob |\n"
-
-    formatted = format_slack_message(message)
-
-    assert "*Alice*" in formatted
-    assert "*Bob*" in formatted
-
-
-def test_table_embedded_in_text() -> None:
-    message = (
-        "Here are the results:\n\n"
-        "| Item | Count |\n"
-        "|------|-------|\n"
-        "| Apples | 5 |\n"
-        "\n"
-        "That's all."
-    )
-
-    formatted = format_slack_message(message)
-
-    assert "Here are the results:" in formatted
-    assert "*Apples*\n  • Count: 5" in formatted
-    assert "That's all." in formatted
-
-
-def test_table_with_formatted_cells() -> None:
-    message = (
-        "| Name | Link |\n"
-        "|------|------|\n"
-        "| **Alice** | [profile](https://example.com) |\n"
-    )
-
-    formatted = format_slack_message(message)
-
-    # Bold cell should not double-wrap: *Alice* not **Alice**
-    assert "*Alice*" in formatted
-    assert "**Alice**" not in formatted
-    assert "<https://example.com|profile>" in formatted
-
-
-def test_table_with_alignment_specifiers() -> None:
-    message = (
-        "| Left | Center | Right |\n" "|:-----|:------:|------:|\n" "| a | b | c |\n"
-    )
-
-    formatted = format_slack_message(message)
-
-    assert "*a*\n  • Center: b\n  • Right: c" in formatted
-
-
-def test_two_tables_in_same_message_use_independent_headers() -> None:
-    message = (
-        "| A | B |\n"
-        "|---|---|\n"
-        "| 1 | 2 |\n"
-        "\n"
-        "| X | Y | Z |\n"
-        "|---|---|---|\n"
-        "| p | q | r |\n"
-    )
-
-    formatted = format_slack_message(message)
-
-    assert "*1*\n  • B: 2" in formatted
-    assert "*p*\n  • Y: q\n  • Z: r" in formatted
-
-
-def test_table_empty_first_column_no_bare_asterisks() -> None:
-    message = "| Name | Status |\n" "|------|--------|\n" "| | Done |\n"
-
-    formatted = format_slack_message(message)
-
-    # Empty title should not produce "**" (bare asterisks)
-    assert "**" not in formatted
-    assert "  • Status: Done" in formatted
--- a/backend/tests/unit/onyx/tools/test_python_tool_availability.py
+++ b/backend/tests/unit/onyx/tools/test_python_tool_availability.py
@@ -1,37 +1,25 @@
-"""Tests for PythonTool availability based on server_enabled flag and health check.
+"""Tests for PythonTool availability based on server_enabled flag.

 Verifies that PythonTool reports itself as unavailable when either:
 - CODE_INTERPRETER_BASE_URL is not set, or
- CodeInterpreterServer.server_enabled is False in the database, or
- The Code Interpreter service health check fails.
-
-Also verifies that the health check result is cached with a TTL.
+- CodeInterpreterServer.server_enabled is False in the database.
 """

 from unittest.mock import MagicMock
 from unittest.mock import patch

-import pytest
 from sqlalchemy.orm import Session

-TOOL_MODULE = "onyx.tools.tool_implementations.python.python_tool"
-CLIENT_MODULE = "onyx.tools.tool_implementations.python.code_interpreter_client"
-
-
-@pytest.fixture(autouse=True)
-def _clear_health_cache() -> None:
-    """Reset the health check cache before every test."""
-    import onyx.tools.tool_implementations.python.code_interpreter_client as mod
-
-    mod._health_cache = {}
-

 # ------------------------------------------------------------------
 # Unavailable when CODE_INTERPRETER_BASE_URL is not set
 # ------------------------------------------------------------------


-@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", None)
+@patch(
+    "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
+    None,
+)
 def test_python_tool_unavailable_without_base_url() -> None:
    from onyx.tools.tool_implementations.python.python_tool import PythonTool

@@ -39,7 +27,10 @@ def test_python_tool_unavailable_without_base_url() -> None:
    assert PythonTool.is_available(db_session) is False


-@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "")
+@patch(
+    "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
+    "",
+)
 def test_python_tool_unavailable_with_empty_base_url() -> None:
    from onyx.tools.tool_implementations.python.python_tool import PythonTool

@@ -52,8 +43,13 @@ def test_python_tool_unavailable_with_empty_base_url() -> None:
 # ------------------------------------------------------------------


-@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://localhost:8000")
-@patch(f"{TOOL_MODULE}.fetch_code_interpreter_server")
+@patch(
+    "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
+    "http://localhost:8000",
+)
+@patch(
+    "onyx.tools.tool_implementations.python.python_tool.fetch_code_interpreter_server",
+)
 def test_python_tool_unavailable_when_server_disabled(
    mock_fetch: MagicMock,
 ) -> None:
@@ -68,15 +64,18 @@ def test_python_tool_unavailable_when_server_disabled(


 # ------------------------------------------------------------------
-# Health check determines availability when URL + server are OK
+# Available when both conditions are met
 # ------------------------------------------------------------------


-@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://localhost:8000")
-@patch(f"{TOOL_MODULE}.fetch_code_interpreter_server")
-@patch(f"{TOOL_MODULE}.CodeInterpreterClient")
-def test_python_tool_available_when_health_check_passes(
-    mock_client_cls: MagicMock,
+@patch(
+    "onyx.tools.tool_implementations.python.python_tool.CODE_INTERPRETER_BASE_URL",
+    "http://localhost:8000",
+)
+@patch(
+    "onyx.tools.tool_implementations.python.python_tool.fetch_code_interpreter_server",
+)
+def test_python_tool_available_when_server_enabled(
    mock_fetch: MagicMock,
 ) -> None:
    from onyx.tools.tool_implementations.python.python_tool import PythonTool
@@ -85,122 +84,5 @@ def test_python_tool_available_when_health_check_passes(
    mock_server.server_enabled = True
    mock_fetch.return_value = mock_server

-    mock_client = MagicMock()
-    mock_client.health.return_value = True
-    mock_client_cls.return_value.__enter__ = MagicMock(return_value=mock_client)
-    mock_client_cls.return_value.__exit__ = MagicMock(return_value=False)
-
    db_session = MagicMock(spec=Session)
    assert PythonTool.is_available(db_session) is True
-    mock_client.health.assert_called_once_with(use_cache=True)
-
-
-@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://localhost:8000")
-@patch(f"{TOOL_MODULE}.fetch_code_interpreter_server")
-@patch(f"{TOOL_MODULE}.CodeInterpreterClient")
-def test_python_tool_unavailable_when_health_check_fails(
-    mock_client_cls: MagicMock,
-    mock_fetch: MagicMock,
-) -> None:
-    from onyx.tools.tool_implementations.python.python_tool import PythonTool
-
-    mock_server = MagicMock()
-    mock_server.server_enabled = True
-    mock_fetch.return_value = mock_server
-
-    mock_client = MagicMock()
-    mock_client.health.return_value = False
-    mock_client_cls.return_value.__enter__ = MagicMock(return_value=mock_client)
-    mock_client_cls.return_value.__exit__ = MagicMock(return_value=False)
-
-    db_session = MagicMock(spec=Session)
-    assert PythonTool.is_available(db_session) is False
-    mock_client.health.assert_called_once_with(use_cache=True)
-
-
-# ------------------------------------------------------------------
-# Health check is NOT reached when preconditions fail
-# ------------------------------------------------------------------
-
-
-@patch(f"{TOOL_MODULE}.CODE_INTERPRETER_BASE_URL", "http://localhost:8000")
-@patch(f"{TOOL_MODULE}.fetch_code_interpreter_server")
-@patch(f"{TOOL_MODULE}.CodeInterpreterClient")
-def test_health_check_not_called_when_server_disabled(
-    mock_client_cls: MagicMock,
-    mock_fetch: MagicMock,
-) -> None:
-    from onyx.tools.tool_implementations.python.python_tool import PythonTool
-
-    mock_server = MagicMock()
-    mock_server.server_enabled = False
-    mock_fetch.return_value = mock_server
-
-    db_session = MagicMock(spec=Session)
-    assert PythonTool.is_available(db_session) is False
-    mock_client_cls.assert_not_called()
-
-
-# ------------------------------------------------------------------
-# Health check caching (tested at the client level)
-# ------------------------------------------------------------------
-
-
-def test_health_check_cached_on_second_call() -> None:
-    from onyx.tools.tool_implementations.python.code_interpreter_client import (
-        CodeInterpreterClient,
-    )
-
-    client = CodeInterpreterClient(base_url="http://fake:9000")
-    mock_response = MagicMock()
-    mock_response.json.return_value = {"status": "ok"}
-
-    with patch.object(client.session, "get", return_value=mock_response) as mock_get:
-        assert client.health(use_cache=True) is True
-        assert client.health(use_cache=True) is True
-        # Only one HTTP call — the second used the cache
-        mock_get.assert_called_once()
-
-
-@patch(f"{CLIENT_MODULE}.time")
-def test_health_check_refreshed_after_ttl_expires(mock_time: MagicMock) -> None:
-    from onyx.tools.tool_implementations.python.code_interpreter_client import (
-        CodeInterpreterClient,
-        _HEALTH_CACHE_TTL_SECONDS,
-    )
-
-    client = CodeInterpreterClient(base_url="http://fake:9000")
-    mock_response = MagicMock()
-    mock_response.json.return_value = {"status": "ok"}
-
-    with patch.object(client.session, "get", return_value=mock_response) as mock_get:
-        # First call at t=0 — cache miss
-        mock_time.monotonic.return_value = 0.0
-        assert client.health(use_cache=True) is True
-        assert mock_get.call_count == 1
-
-        # Second call within TTL — cache hit
-        mock_time.monotonic.return_value = float(_HEALTH_CACHE_TTL_SECONDS - 1)
-        assert client.health(use_cache=True) is True
-        assert mock_get.call_count == 1
-
-        # Third call after TTL — cache miss, fresh request
-        mock_time.monotonic.return_value = float(_HEALTH_CACHE_TTL_SECONDS + 1)
-        assert client.health(use_cache=True) is True
-        assert mock_get.call_count == 2
-
-
-def test_health_check_no_cache_by_default() -> None:
-    from onyx.tools.tool_implementations.python.code_interpreter_client import (
-        CodeInterpreterClient,
-    )
-
-    client = CodeInterpreterClient(base_url="http://fake:9000")
-    mock_response = MagicMock()
-    mock_response.json.return_value = {"status": "ok"}
-
-    with patch.object(client.session, "get", return_value=mock_response) as mock_get:
-        assert client.health() is True
-        assert client.health() is True
-        # Both calls hit the network when use_cache=False (default)
-        assert mock_get.call_count == 2
--- a/backend/tests/unit/onyx/tools/tool_implementations/websearch/test_web_search_tool_run.py
+++ b/backend/tests/unit/onyx/tools/tool_implementations/websearch/test_web_search_tool_run.py
@@ -1,164 +0,0 @@
-from __future__ import annotations
-
-from typing import Any
-from typing import cast
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-import pytest
-
-from onyx.server.query_and_chat.placement import Placement
-from onyx.tools.models import ToolCallException
-from onyx.tools.models import WebSearchToolOverrideKwargs
-from onyx.tools.tool_implementations.web_search.models import WebSearchResult
-from onyx.tools.tool_implementations.web_search.web_search_tool import (
-    _normalize_queries_input,
-)
-from onyx.tools.tool_implementations.web_search.web_search_tool import WebSearchTool
-
-
-def _make_result(
-    title: str = "Title", link: str = "https://example.com"
-) -> WebSearchResult:
-    return WebSearchResult(title=title, link=link, snippet="snippet")
-
-
-def _make_tool(mock_provider: Any) -> WebSearchTool:
-    """Instantiate WebSearchTool with all DB/provider deps mocked out."""
-    provider_model = MagicMock()
-    provider_model.provider_type = "brave"
-    provider_model.api_key = MagicMock()
-    provider_model.api_key.get_value.return_value = "fake-key"
-    provider_model.config = {}
-
-    with (
-        patch(
-            "onyx.tools.tool_implementations.web_search.web_search_tool.get_session_with_current_tenant"
-        ) as mock_session_ctx,
-        patch(
-            "onyx.tools.tool_implementations.web_search.web_search_tool.fetch_active_web_search_provider",
-            return_value=provider_model,
-        ),
-        patch(
-            "onyx.tools.tool_implementations.web_search.web_search_tool.build_search_provider_from_config",
-            return_value=mock_provider,
-        ),
-    ):
-        mock_session_ctx.return_value.__enter__ = MagicMock(return_value=MagicMock())
-        mock_session_ctx.return_value.__exit__ = MagicMock(return_value=False)
-        tool = WebSearchTool(tool_id=1, emitter=MagicMock())
-
-    return tool
-
-
-def _run(tool: WebSearchTool, queries: Any) -> list[str]:
-    """Call tool.run() and return the list of query strings passed to provider.search."""
-    placement = Placement(turn_index=0, tab_index=0)
-    override_kwargs = WebSearchToolOverrideKwargs(starting_citation_num=1)
-    tool.run(placement=placement, override_kwargs=override_kwargs, queries=queries)
-    search_mock = cast(MagicMock, tool._provider.search)  # noqa: SLF001
-    return [call.args[0] for call in search_mock.call_args_list]
-
-
-class TestNormalizeQueriesInput:
-    """Unit tests for _normalize_queries_input (coercion + sanitization)."""
-
-    def test_bare_string_returns_single_element_list(self) -> None:
-        assert _normalize_queries_input("hello") == ["hello"]
-
-    def test_bare_string_stripped_and_sanitized(self) -> None:
-        assert _normalize_queries_input("  hello  ") == ["hello"]
-        # Control chars (e.g. null) removed; no space inserted
-        assert _normalize_queries_input("hello\x00world") == ["helloworld"]
-
-    def test_empty_string_returns_empty_list(self) -> None:
-        assert _normalize_queries_input("") == []
-        assert _normalize_queries_input("   ") == []
-
-    def test_list_of_strings_returned_sanitized(self) -> None:
-        assert _normalize_queries_input(["a", "b"]) == ["a", "b"]
-        # Leading/trailing space stripped; control chars (e.g. tab) removed
-        assert _normalize_queries_input(["  a  ", "b\tb"]) == ["a", "bb"]
-
-    def test_list_none_skipped(self) -> None:
-        assert _normalize_queries_input(["a", None, "b"]) == ["a", "b"]
-
-    def test_list_non_string_coerced(self) -> None:
-        assert _normalize_queries_input([1, "two"]) == ["1", "two"]
-
-    def test_list_whitespace_only_dropped(self) -> None:
-        assert _normalize_queries_input(["a", "", "  ", "b"]) == ["a", "b"]
-
-    def test_non_list_non_string_returns_empty_list(self) -> None:
-        assert _normalize_queries_input(42) == []
-        assert _normalize_queries_input({}) == []
-
-
-class TestWebSearchToolRunQueryCoercion:
-    def test_list_of_strings_dispatches_each_query(self) -> None:
-        """Normal case: list of queries → one search call per query."""
-        mock_provider = MagicMock()
-        mock_provider.search.return_value = [_make_result()]
-        mock_provider.supports_site_filter = False
-        tool = _make_tool(mock_provider)
-
-        dispatched = _run(tool, ["python decorators", "python generators"])
-
-        # run_functions_tuples_in_parallel uses a thread pool; call_args_list order is non-deterministic.
-        assert sorted(dispatched) == ["python decorators", "python generators"]
-
-    def test_bare_string_dispatches_as_single_query(self) -> None:
-        """LLM returns a bare string instead of an array — must NOT be split char-by-char."""
-        mock_provider = MagicMock()
-        mock_provider.search.return_value = [_make_result()]
-        mock_provider.supports_site_filter = False
-        tool = _make_tool(mock_provider)
-
-        dispatched = _run(tool, "what is the capital of France")
-
-        assert len(dispatched) == 1
-        assert dispatched[0] == "what is the capital of France"
-
-    def test_bare_string_does_not_search_individual_characters(self) -> None:
-        """Regression: single-char searches must not occur."""
-        mock_provider = MagicMock()
-        mock_provider.search.return_value = [_make_result()]
-        mock_provider.supports_site_filter = False
-        tool = _make_tool(mock_provider)
-
-        dispatched = _run(tool, "hi")
-        for query_arg in dispatched:
-            assert (
-                len(query_arg) > 1
-            ), f"Single-character query dispatched: {query_arg!r}"
-
-    def test_control_characters_sanitized_before_dispatch(self) -> None:
-        """Queries with control chars have those chars removed before dispatch."""
-        mock_provider = MagicMock()
-        mock_provider.search.return_value = [_make_result()]
-        mock_provider.supports_site_filter = False
-        tool = _make_tool(mock_provider)
-
-        dispatched = _run(tool, ["foo\x00bar", "baz\tbaz"])
-
-        # run_functions_tuples_in_parallel uses a thread pool; call_args_list is in
-        # execution order, not submission order, so compare in sorted order.
-        assert sorted(dispatched) == ["bazbaz", "foobar"]
-
-    def test_all_empty_or_whitespace_raises_tool_call_exception(self) -> None:
-        """When normalization yields no valid queries, run() raises ToolCallException."""
-        mock_provider = MagicMock()
-        mock_provider.supports_site_filter = False
-        tool = _make_tool(mock_provider)
-        placement = Placement(turn_index=0, tab_index=0)
-        override_kwargs = WebSearchToolOverrideKwargs(starting_citation_num=1)
-
-        with pytest.raises(ToolCallException) as exc_info:
-            tool.run(
-                placement=placement,
-                override_kwargs=override_kwargs,
-                queries="   ",
-            )
-
-        assert "No valid" in str(exc_info.value)
-        cast(MagicMock, mock_provider.search).assert_not_called()
--- a/deployment/docker_compose/docker-compose.multitenant-dev.yml
+++ b/deployment/docker_compose/docker-compose.multitenant-dev.yml
@@ -138,6 +138,7 @@ services:
      - indexing_model_server
    restart: unless-stopped
    environment:
+      - USE_LIGHTWEIGHT_BACKGROUND_WORKER=${USE_LIGHTWEIGHT_BACKGROUND_WORKER:-true}
      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
      - MULTI_TENANT=true
      - LOG_LEVEL=DEBUG
--- a/deployment/docker_compose/docker-compose.no-vectordb.yml
+++ b/deployment/docker_compose/docker-compose.no-vectordb.yml
@@ -1,32 +1,30 @@
 # =============================================================================
-# ONYX LITE — MINIMAL DEPLOYMENT OVERLAY
+# ONYX NO-VECTOR-DB OVERLAY
 # =============================================================================
-# Overlay to run Onyx in a minimal configuration: no vector database (Vespa),
-# no Redis, no model servers, and no background workers. Only PostgreSQL is
-# required. In this mode, connectors and RAG search are disabled, but the core
-# chat experience (LLM conversations, tools, user file uploads, Projects,
-# Agent knowledge, code interpreter) still works.
+# Overlay to run Onyx without a vector database (Vespa), model servers, or
+# code interpreter. In this mode, connectors and RAG search are disabled, but
+# the core chat experience (LLM conversations, tools, user file uploads,
+# Projects, Agent knowledge) still works.
 #
 # Usage:
-#   docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml up -d
+#   docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml up -d
 #
 # With dev ports:
-#   docker compose -f docker-compose.yml -f docker-compose.onyx-lite.yml \
+#   docker compose -f docker-compose.yml -f docker-compose.no-vectordb.yml \
 #                  -f docker-compose.dev.yml up -d --wait
 #
 # This overlay:
-#   - Moves Vespa (index), both model servers, code-interpreter, Redis (cache),
-#     and the background worker to profiles so they do not start by default
-#   - Makes depends_on references to removed services optional
+#   - Moves Vespa (index), both model servers, and code-interpreter to profiles
+#     so they do not start by default
+#   - Moves the background worker to the "background" profile (the API server
+#     handles all background work via FastAPI BackgroundTasks)
+#   - Makes the depends_on references to removed services optional
 #   - Sets DISABLE_VECTOR_DB=true on the api_server
-#   - Uses PostgreSQL for caching and auth instead of Redis
-#   - Uses PostgreSQL for file storage instead of S3/MinIO
 #
 # To selectively bring services back:
 #   --profile vectordb          Vespa + indexing model server
 #   --profile inference         Inference model server
-#   --profile background        Background worker (Celery) — also needs redis
-#   --profile redis             Redis cache
+#   --profile background        Background worker (Celery)
 #   --profile code-interpreter  Code interpreter
 # =============================================================================

@@ -38,9 +36,6 @@ services:
      index:
        condition: service_started
        required: false
-      cache:
-        condition: service_started
-        required: false
      inference_model_server:
        condition: service_started
        required: false
@@ -50,11 +45,9 @@ services:
    environment:
      - DISABLE_VECTOR_DB=true
      - FILE_STORE_BACKEND=postgres
-      - CACHE_BACKEND=postgres
-      - AUTH_BACKEND=postgres

  # Move the background worker to a profile so it does not start by default.
-  # The API server handles all background work in lite mode.
+  # The API server handles all background work in NO_VECTOR_DB mode.
  background:
    profiles: ["background"]
    depends_on:
@@ -68,11 +61,6 @@ services:
        condition: service_started
        required: false

-  # Move Redis to a profile so it does not start by default.
-  # The Postgres cache backend replaces Redis in lite mode.
-  cache:
-    profiles: ["redis"]
-
  # Move Vespa and indexing model server to a profile so they do not start.
  index:
    profiles: ["vectordb"]
--- a/deployment/docker_compose/docker-compose.prod-cloud.yml
+++ b/deployment/docker_compose/docker-compose.prod-cloud.yml
@@ -52,6 +52,7 @@ services:
      - indexing_model_server
    restart: unless-stopped
    environment:
+      - USE_LIGHTWEIGHT_BACKGROUND_WORKER=${USE_LIGHTWEIGHT_BACKGROUND_WORKER:-true}
      - AUTH_TYPE=${AUTH_TYPE:-oidc}
      - POSTGRES_HOST=relational_db
      - VESPA_HOST=index
--- a/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml
+++ b/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml
@@ -65,6 +65,7 @@ services:
      - indexing_model_server
    restart: unless-stopped
    environment:
+      - USE_LIGHTWEIGHT_BACKGROUND_WORKER=${USE_LIGHTWEIGHT_BACKGROUND_WORKER:-true}
      - AUTH_TYPE=${AUTH_TYPE:-oidc}
      - POSTGRES_HOST=relational_db
      - VESPA_HOST=index
--- a/deployment/docker_compose/docker-compose.prod.yml
+++ b/deployment/docker_compose/docker-compose.prod.yml
@@ -70,6 +70,7 @@ services:
      - indexing_model_server
    restart: unless-stopped
    environment:
+      - USE_LIGHTWEIGHT_BACKGROUND_WORKER=${USE_LIGHTWEIGHT_BACKGROUND_WORKER:-true}
      - AUTH_TYPE=${AUTH_TYPE:-oidc}
      - POSTGRES_HOST=relational_db
      - VESPA_HOST=index
--- a/deployment/docker_compose/docker-compose.search-testing.yml
+++ b/deployment/docker_compose/docker-compose.search-testing.yml
@@ -58,6 +58,7 @@ services:
    env_file:
      - .env_eval
    environment:
+      - USE_LIGHTWEIGHT_BACKGROUND_WORKER=${USE_LIGHTWEIGHT_BACKGROUND_WORKER:-true}
      - AUTH_TYPE=disabled
      - POSTGRES_HOST=relational_db
      - VESPA_HOST=index
--- a/deployment/docker_compose/docker-compose.yml
+++ b/deployment/docker_compose/docker-compose.yml
@@ -146,6 +146,7 @@ services:
      - indexing_model_server
    restart: unless-stopped
    environment:
+      - USE_LIGHTWEIGHT_BACKGROUND_WORKER=${USE_LIGHTWEIGHT_BACKGROUND_WORKER:-true}
      - FILE_STORE_BACKEND=${FILE_STORE_BACKEND:-s3}
      - POSTGRES_HOST=${POSTGRES_HOST:-relational_db}
      - VESPA_HOST=${VESPA_HOST:-index}
--- a/deployment/helm/charts/onyx/values-lite.yaml
+++ b/deployment/helm/charts/onyx/values-lite.yaml
@@ -1,31 +0,0 @@
-# =============================================================================
-# ONYX LITE — MINIMAL DEPLOYMENT VALUES
-# =============================================================================
-# Minimal Onyx deployment: no vector database, no Redis, no model servers.
-# Only PostgreSQL is required. Connectors and RAG search are disabled, but the
-# core chat experience (LLM conversations, tools, user file uploads, Projects,
-# Agent knowledge) still works.
-#
-# Usage:
-#   helm install onyx ./deployment/helm/charts/onyx \
-#     -f ./deployment/helm/charts/onyx/values-lite.yaml
-#
-# Or merged with your own overrides:
-#   helm install onyx ./deployment/helm/charts/onyx \
-#     -f ./deployment/helm/charts/onyx/values-lite.yaml \
-#     -f my-overrides.yaml
-# =============================================================================
-
-vectorDB:
-  enabled: false
-
-vespa:
-  enabled: false
-
-redis:
-  enabled: false
-
-configMap:
-  CACHE_BACKEND: "postgres"
-  AUTH_BACKEND: "postgres"
-  FILE_STORE_BACKEND: "postgres"
--- a/desktop/README.md
+++ b/desktop/README.md
@@ -14,32 +14,30 @@ Built with [Tauri](https://tauri.app) for minimal bundle size (~10MB vs Electron

 ## Keyboard Shortcuts

-| Shortcut | Action           |
-| -------- | ---------------- |
-| `⌘ N`    | New Chat         |
-| `⌘ ⇧ N`  | New Window       |
-| `⌘ R`    | Reload           |
-| `⌘ [`    | Go Back          |
-| `⌘ ]`    | Go Forward       |
-| `⌘ ,`    | Open Config File |
-| `⌘ W`    | Close Window     |
-| `⌘ Q`    | Quit             |
+| Shortcut | Action |
+|----------|--------|
+| `⌘ N` | New Chat |
+| `⌘ ⇧ N` | New Window |
+| `⌘ R` | Reload |
+| `⌘ [` | Go Back |
+| `⌘ ]` | Go Forward |
+| `⌘ ,` | Open Config File |
+| `⌘ W` | Close Window |
+| `⌘ Q` | Quit |

 ## Prerequisites

 1. **Rust** (latest stable)
-
   ```bash
   curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
   source $HOME/.cargo/env
   ```

 2. **Node.js** (18+)
-
   ```bash
   # Using homebrew
   brew install node
-
+   
   # Or using nvm
   nvm install 18
   ```
@@ -57,21 +55,16 @@ npm install

 # Run in development mode
 npm run dev
-
-# Run in debug mode
-npm run debug
 ```

 ## Building

 ### Build for current architecture
-
 ```bash
 npm run build
 ```

 ### Build Universal Binary (Intel + Apple Silicon)
-
 ```bash
 # First, add the targets
 rustup target add x86_64-apple-darwin
@@ -110,7 +103,6 @@ Before building, add your app icons to `src-tauri/icons/`:
 - `icon.ico` (Windows, optional)

 You can generate these from a 1024x1024 source image using:
-
 ```bash
 # Using tauri's icon generator
 npm run tauri icon path/to/your-icon.png
@@ -123,7 +115,6 @@ npm run tauri icon path/to/your-icon.png
 The app defaults to `https://cloud.onyx.app` but supports any Onyx instance.

 **Config file location:**
-
 - macOS: `~/Library/Application Support/app.onyx.desktop/config.json`
 - Linux: `~/.config/app.onyx.desktop/config.json`
 - Windows: `%APPDATA%/app.onyx.desktop/config.json`
@@ -144,7 +135,6 @@ The app defaults to `https://cloud.onyx.app` but supports any Onyx instance.
 4. Restart the app

 **Quick edit via terminal:**
-
 ```bash
 # macOS
 open -t ~/Library/Application\ Support/app.onyx.desktop/config.json
@@ -156,7 +146,6 @@ code ~/Library/Application\ Support/app.onyx.desktop/config.json
 ### Change the default URL in build

 Edit `src-tauri/tauri.conf.json`:
-
 ```json
 {
  "app": {
@@ -176,7 +165,6 @@ Edit `src-tauri/src/main.rs` in the `setup_shortcuts` function.
 ### Window appearance

 Modify the window configuration in `src-tauri/tauri.conf.json`:
-
 - `titleBarStyle`: `"Overlay"` (macOS native) or `"Visible"`
 - `decorations`: Window chrome
 - `transparent`: For custom backgrounds
@@ -184,20 +172,16 @@ Modify the window configuration in `src-tauri/tauri.conf.json`:
 ## Troubleshooting

 ### "Unable to resolve host"
-
 Make sure you have an internet connection. The app loads content from `cloud.onyx.app`.

 ### Build fails on M1/M2 Mac
-
 ```bash
 # Ensure you have the right target
 rustup target add aarch64-apple-darwin
 ```

 ### Code signing for distribution
-
 For distributing outside the App Store, you'll need to:
-
 1. Get an Apple Developer certificate
 2. Sign the app: `codesign --deep --force --sign "Developer ID" target/release/bundle/macos/Onyx.app`
 3. Notarize with Apple
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Raunak Bhagat	4ca8050540	fix: remove unused Content import in NonAdminStep	2026-03-03 10:55:05 -08:00
Raunak Bhagat	85186fc58a	refactor(fe): move onboarding types to interfaces/onboarding.ts Move shared onboarding type definitions (OnboardingStep, OnboardingState, OnboardingActions, etc.) from sections/onboarding/types.ts to interfaces/onboarding.ts. Update all 20 import sites to use the new path.	2026-03-03 10:51:57 -08:00
Raunak Bhagat	7e37c48fb7	refactor(fe): move useShowOnboarding test to hooks/__tests__ Co-locate the test with the hook it tests. Update the relative import for OnboardingStep to use the absolute path.	2026-03-03 10:49:34 -08:00
Raunak Bhagat	7122e5a9a7	refactor(fe): consolidate useOnboardingState into useShowOnboarding Inline useOnboardingState into useShowOnboarding since it was the only consumer. Delete the now-empty useOnboardingState.ts file. Update test mocks to target the underlying dependencies instead of the removed module.	2026-03-03 10:47:16 -08:00
Raunak Bhagat	5ad3dd370f	Remove dead code	2026-03-03 10:38:52 -08:00
Raunak Bhagat	66c464c29a	refactor(fe): migrate onboarding to Content/ContentAction and move to sections Replace manual icon+title+description flex layouts with opal Content and ContentAction components in LLMStep, NameStep, and NonAdminStep. Move the entire onboarding directory from refresh-components/ to sections/ and update all import paths.	2026-03-03 10:21:33 -08:00