mypy

WIP
2026-03-03 14:45:46 +00:00 · 2025-12-29 16:23:03 -08:00 · 2025-12-29 16:22:11 -08:00 · 2025-12-29 16:20:45 -08:00 · 2025-12-29 16:20:45 -08:00 · 2025-12-29 16:20:45 -08:00
1207 changed files with 35366 additions and 98797 deletions
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -1,8 +0,0 @@
-# Exclude these commits from git blame (e.g. mass reformatting).
-# These are ignored by GitHub automatically.
-# To enable this locally, run:
-#
-#    git config blame.ignoreRevsFile .git-blame-ignore-revs
-
-3134e5f840c12c8f32613ce520101a047c89dcc2  # refactor(whitespace): rm temporary react fragments (#7161)
-ed3f72bc75f3e3a9ae9e4d8cd38278f9c97e78b4  # refactor(whitespace): rm react fragment #7190
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,10 +1,3 @@
 * @onyx-dot-app/onyx-core-team
 # Helm charts Owners
 /helm/ @justin-tahara
-
-# Web standards updates
-/web/STANDARDS.md @raunakab @Weves
-
-# Agent context files
-/CLAUDE.md.template @Weves
-/AGENTS.md.template @Weves
--- a/.github/actions/setup-python-and-install-dependencies/action.yml
+++ b/.github/actions/setup-python-and-install-dependencies/action.yml
@@ -7,6 +7,14 @@ inputs:
 runs:
  using: "composite"
  steps:
+    - name: Setup uv
+      uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
+      with:
+        version: "0.9.9"
+      # TODO: Enable caching once there is a uv.lock file checked in.
+      # with:
+      #   enable-cache: true
+
    - name: Compute requirements hash
      id: req-hash
      shell: bash
@@ -22,8 +30,6 @@ runs:
        done <<< "$REQUIREMENTS"
        echo "hash=$(echo "$hash" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT"

-    # NOTE: This comes before Setup uv since clean-ups run in reverse chronological order
-    # such that Setup uv's prune-cache is able to prune the cache before we upload.
    - name: Cache uv cache directory
      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
      with:
@@ -32,14 +38,6 @@ runs:
        restore-keys: |
          ${{ runner.os }}-uv-

-    - name: Setup uv
-      uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
-      with:
-        version: "0.9.9"
-      # TODO: Enable caching once there is a uv.lock file checked in.
-      # with:
-      #   enable-cache: true
-
    - name: Setup Python
      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
      with:
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
--- a/.github/workflows/docker-tag-beta.yml
+++ b/.github/workflows/docker-tag-beta.yml
@@ -21,7 +21,7 @@ jobs:
    timeout-minutes: 45
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
--- a/.github/workflows/docker-tag-latest.yml
+++ b/.github/workflows/docker-tag-latest.yml
@@ -21,7 +21,7 @@ jobs:
    timeout-minutes: 45
    steps:
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -29,7 +29,6 @@ jobs:
        run: |
          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
          helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
-          helm repo add opensearch https://opensearch-project.github.io/helm-charts
          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
          helm repo add minio https://charts.min.io/
--- a/.github/workflows/nightly-close-stale-issues.yml
+++ b/.github/workflows/nightly-close-stale-issues.yml
@@ -13,7 +13,7 @@ jobs:
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
-      - uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # ratchet:actions/stale@v10
+      - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # ratchet:actions/stale@v10
        with:
          stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
          stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -94,7 +94,7 @@ jobs:

    steps:
    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+      uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

    - name: Login to Docker Hub
      uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -38,16 +38,11 @@ env:
  # LLMs
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-  VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }}
-  VERTEX_LOCATION: ${{ vars.VERTEX_LOCATION }}

  # Code Interpreter
  # TODO: debug why this is failing and enable
  CODE_INTERPRETER_BASE_URL: http://localhost:8000

-  # OpenSearch
-  OPENSEARCH_ADMIN_PASSWORD: "StrongPassword123!"
-
 jobs:
  discover-test-dirs:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
@@ -128,13 +123,11 @@ jobs:
          docker compose \
            -f docker-compose.yml \
            -f docker-compose.dev.yml \
-            -f docker-compose.opensearch.yml \
            up -d \
            minio \
            relational_db \
            cache \
            index \
-            opensearch \
            code-interpreter

      - name: Run migrations
@@ -163,7 +156,7 @@ jobs:
          cd deployment/docker_compose

          # Get list of running containers
-          containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml -f docker-compose.opensearch.yml ps -q)
+          containers=$(docker compose -f docker-compose.yml -f docker-compose.dev.yml ps -q)

          # Collect logs from each container
          for container in $containers; do
@@ -177,7 +170,7 @@ jobs:

      - name: Upload Docker logs
        if: failure()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v5
        with:
          name: docker-logs-${{ matrix.test-dir }}
          path: docker-logs/
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -6,11 +6,11 @@ concurrency:
 on:
  merge_group:
  pull_request:
-    branches: [main]
+    branches: [ main ]
  push:
    tags:
      - "v*.*.*"
-  workflow_dispatch: # Allows manual triggering
+  workflow_dispatch:  # Allows manual triggering

 permissions:
  contents: read
@@ -18,241 +18,225 @@ permissions:
 jobs:
  helm-chart-check:
    # See https://runs-on.com/runners/linux/
-    runs-on:
-      [
-        runs-on,
-        runner=8cpu-linux-x64,
-        hdd=256,
-        "run-id=${{ github.run_id }}-helm-chart-check",
-      ]
+    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}-helm-chart-check"]
    timeout-minutes: 45

    # fetch-depth 0 is required for helm/chart-testing-action
    steps:
-      - name: Checkout code
-        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-        with:
-          fetch-depth: 0
-          persist-credentials: false
+    - name: Checkout code
+      uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+      with:
+        fetch-depth: 0
+        persist-credentials: false

-      - name: Set up Helm
-        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
-        with:
-          version: v3.19.0
+    - name: Set up Helm
+      uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
+      with:
+        version: v3.19.0

-      - name: Set up chart-testing
-        # NOTE: This is Jamison's patch from https://github.com/helm/chart-testing-action/pull/194
-        uses: helm/chart-testing-action@8958a6ac472cbd8ee9a8fbb6f1acbc1b0e966e44 # zizmor: ignore[impostor-commit]
-        with:
-          uv_version: "0.9.9"
+    - name: Set up chart-testing
+      uses: helm/chart-testing-action@6ec842c01de15ebb84c8627d2744a0c2f2755c9f # ratchet:helm/chart-testing-action@v2.8.0

-      # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
-      - name: Run chart-testing (list-changed)
-        id: list-changed
-        env:
-          DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
-        run: |
-          echo "default_branch: ${DEFAULT_BRANCH}"
-          changed=$(ct list-changed --remote origin --target-branch ${DEFAULT_BRANCH} --chart-dirs deployment/helm/charts)
-          echo "list-changed output: $changed"
-          if [[ -n "$changed" ]]; then
-            echo "changed=true" >> "$GITHUB_OUTPUT"
-          fi
+    # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
+    - name: Run chart-testing (list-changed)
+      id: list-changed
+      env:
+        DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
+      run: |
+        echo "default_branch: ${DEFAULT_BRANCH}"
+        changed=$(ct list-changed --remote origin --target-branch ${DEFAULT_BRANCH} --chart-dirs deployment/helm/charts)
+        echo "list-changed output: $changed"
+        if [[ -n "$changed" ]]; then
+          echo "changed=true" >> "$GITHUB_OUTPUT"
+        fi

-      # uncomment to force run chart-testing
-      #     - name: Force run chart-testing (list-changed)
-      #       id: list-changed
-      #       run: echo "changed=true" >> $GITHUB_OUTPUT
-      # lint all charts if any changes were detected
-      - name: Run chart-testing (lint)
-        if: steps.list-changed.outputs.changed == 'true'
-        run: ct lint --config ct.yaml --all
-        # the following would lint only changed charts, but linting isn't expensive
-        # run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}
+    # uncomment to force run chart-testing
+#     - name: Force run chart-testing (list-changed)
+#       id: list-changed
+#       run: echo "changed=true" >> $GITHUB_OUTPUT

-      - name: Create kind cluster
-        if: steps.list-changed.outputs.changed == 'true'
-        uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0
+    # lint all charts if any changes were detected
+    - name: Run chart-testing (lint)
+      if: steps.list-changed.outputs.changed == 'true'
+      run: ct lint --config ct.yaml --all
+      # the following would lint only changed charts, but linting isn't expensive
+      # run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}

-      - name: Pre-install cluster status check
-        if: steps.list-changed.outputs.changed == 'true'
-        run: |
-          echo "=== Pre-install Cluster Status ==="
-          kubectl get nodes -o wide
-          kubectl get pods --all-namespaces
-          kubectl get storageclass
+    - name: Create kind cluster
+      if: steps.list-changed.outputs.changed == 'true'
+      uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0

-      - name: Add Helm repositories and update
-        if: steps.list-changed.outputs.changed == 'true'
-        run: |
-          echo "=== Adding Helm repositories ==="
-          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
-          helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
-          helm repo add opensearch https://opensearch-project.github.io/helm-charts
-          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
-          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
-          helm repo add minio https://charts.min.io/
-          helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
-          helm repo update
+    - name: Pre-install cluster status check
+      if: steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Pre-install Cluster Status ==="
+        kubectl get nodes -o wide
+        kubectl get pods --all-namespaces
+        kubectl get storageclass

-      - name: Install Redis operator
-        if: steps.list-changed.outputs.changed == 'true'
-        shell: bash
-        run: |
-          echo "=== Installing redis-operator CRDs ==="
-          helm upgrade --install redis-operator ot-container-kit/redis-operator \
-            --namespace redis-operator --create-namespace --wait --timeout 300s
+    - name: Add Helm repositories and update
+      if: steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Adding Helm repositories ==="
+        helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
+        helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
+        helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
+        helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
+        helm repo add minio https://charts.min.io/
+        helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
+        helm repo update

-      - name: Pre-pull required images
-        if: steps.list-changed.outputs.changed == 'true'
-        run: |
-          echo "=== Pre-pulling required images to avoid timeout ==="
-          KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
-          echo "Kind cluster: $KIND_CLUSTER"
+    - name: Install Redis operator
+      if: steps.list-changed.outputs.changed == 'true'
+      shell: bash
+      run: |
+        echo "=== Installing redis-operator CRDs ==="
+        helm upgrade --install redis-operator ot-container-kit/redis-operator \
+          --namespace redis-operator --create-namespace --wait --timeout 300s

-          IMAGES=(
-            "ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
-            "quay.io/opstree/redis:v7.0.15"
-            "docker.io/onyxdotapp/onyx-web-server:latest"
-          )
+    - name: Pre-pull required images
+      if: steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Pre-pulling required images to avoid timeout ==="
+        KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
+        echo "Kind cluster: $KIND_CLUSTER"

-          for image in "${IMAGES[@]}"; do
-            echo "Pre-pulling $image"
-            if docker pull "$image"; then
-              kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
-            else
-              echo "Failed to pull $image"
-            fi
-          done
+        IMAGES=(
+          "ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
+          "quay.io/opstree/redis:v7.0.15"
+          "docker.io/onyxdotapp/onyx-web-server:latest"
+        )

-          echo "=== Images loaded into Kind cluster ==="
-          docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."
-
-      - name: Validate chart dependencies
-        if: steps.list-changed.outputs.changed == 'true'
-        run: |
-          echo "=== Validating chart dependencies ==="
-          cd deployment/helm/charts/onyx
-          helm dependency update
-          helm lint .
-
-      - name: Run chart-testing (install) with enhanced monitoring
-        timeout-minutes: 25
-        if: steps.list-changed.outputs.changed == 'true'
-        run: |
-          echo "=== Starting chart installation with monitoring ==="
-
-          # Function to monitor cluster state
-          monitor_cluster() {
-            while true; do
-              echo "=== Cluster Status Check at $(date) ==="
-              # Only show non-running pods to reduce noise
-              NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)
-              if [ "$NON_RUNNING_PODS" -gt 0 ]; then
-                echo "Non-running pods:"
-                kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded
-              else
-                echo "All pods running successfully"
-              fi
-              # Only show recent events if there are issues
-              RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)
-              if [ -n "$RECENT_EVENTS" ]; then
-                echo "Recent warnings/errors:"
-                echo "$RECENT_EVENTS"
-              fi
-              sleep 60
-            done
-          }
-
-          # Start monitoring in background
-          monitor_cluster &
-          MONITOR_PID=$!
-
-          # Set up cleanup
-          cleanup() {
-            echo "=== Cleaning up monitoring process ==="
-            kill $MONITOR_PID 2>/dev/null || true
-            echo "=== Final cluster state ==="
-            kubectl get pods --all-namespaces
-            kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
-          }
-
-          # Trap cleanup on exit
-          trap cleanup EXIT
-
-          # Run the actual installation with detailed logging
-          # Note that opensearch.enabled is true whereas others in this install
-          # are false. There is some work that needs to be done to get this
-          # entire step working in CI, enabling opensearch here is a small step
-          # in that direction. If this is causing issues, disabling it in this
-          # step should be ok in the short term.
-          echo "=== Starting ct install ==="
-          set +e
-          ct install --all \
-            --helm-extra-set-args="\
-              --set=nginx.enabled=false \
-              --set=minio.enabled=false \
-              --set=vespa.enabled=false \
-              --set=opensearch.enabled=true \
-              --set=auth.opensearch.enabled=true \
-              --set=slackbot.enabled=false \
-              --set=postgresql.enabled=true \
-              --set=postgresql.nameOverride=cloudnative-pg \
-              --set=postgresql.cluster.storage.storageClass=standard \
-              --set=redis.enabled=true \
-              --set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
-              --set=webserver.replicaCount=1 \
-              --set=api.replicaCount=0 \
-              --set=inferenceCapability.replicaCount=0 \
-              --set=indexCapability.replicaCount=0 \
-              --set=celery_beat.replicaCount=0 \
-              --set=celery_worker_heavy.replicaCount=0 \
-              --set=celery_worker_docfetching.replicaCount=0 \
-              --set=celery_worker_docprocessing.replicaCount=0 \
-              --set=celery_worker_light.replicaCount=0 \
-              --set=celery_worker_monitoring.replicaCount=0 \
-              --set=celery_worker_primary.replicaCount=0 \
-              --set=celery_worker_user_file_processing.replicaCount=0 \
-              --set=celery_worker_user_files_indexing.replicaCount=0" \
-            --helm-extra-args="--timeout 900s --debug" \
-            --debug --config ct.yaml
-          CT_EXIT=$?
-          set -e
-
-          if [[ $CT_EXIT -ne 0 ]]; then
-            echo "ct install failed with exit code $CT_EXIT"
-            exit $CT_EXIT
+        for image in "${IMAGES[@]}"; do
+          echo "Pre-pulling $image"
+          if docker pull "$image"; then
+            kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
          else
-            echo "=== Installation completed successfully ==="
+            echo "Failed to pull $image"
          fi
+        done

-          kubectl get pods --all-namespaces
+        echo "=== Images loaded into Kind cluster ==="
+        docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."

-      - name: Post-install verification
-        if: steps.list-changed.outputs.changed == 'true'
-        run: |
-          echo "=== Post-install verification ==="
-          kubectl get pods --all-namespaces
-          kubectl get services --all-namespaces
-          # Only show issues if they exist
-          kubectl describe pods --all-namespaces | grep -A 5 -B 2 "Failed\|Error\|Warning" || echo "No pod issues found"
+    - name: Validate chart dependencies
+      if: steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Validating chart dependencies ==="
+        cd deployment/helm/charts/onyx
+        helm dependency update
+        helm lint .

-      - name: Cleanup on failure
-        if: failure() && steps.list-changed.outputs.changed == 'true'
-        run: |
-          echo "=== Cleanup on failure ==="
+    - name: Run chart-testing (install) with enhanced monitoring
+      timeout-minutes: 25
+      if: steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Starting chart installation with monitoring ==="
+
+        # Function to monitor cluster state
+        monitor_cluster() {
+          while true; do
+            echo "=== Cluster Status Check at $(date) ==="
+            # Only show non-running pods to reduce noise
+            NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)
+            if [ "$NON_RUNNING_PODS" -gt 0 ]; then
+              echo "Non-running pods:"
+              kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded
+            else
+              echo "All pods running successfully"
+            fi
+            # Only show recent events if there are issues
+            RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)
+            if [ -n "$RECENT_EVENTS" ]; then
+              echo "Recent warnings/errors:"
+              echo "$RECENT_EVENTS"
+            fi
+            sleep 60
+          done
+        }
+
+        # Start monitoring in background
+        monitor_cluster &
+        MONITOR_PID=$!
+
+        # Set up cleanup
+        cleanup() {
+          echo "=== Cleaning up monitoring process ==="
+          kill $MONITOR_PID 2>/dev/null || true
          echo "=== Final cluster state ==="
          kubectl get pods --all-namespaces
-          kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
+          kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
+        }

-          echo "=== Pod descriptions for debugging ==="
-          kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
+        # Trap cleanup on exit
+        trap cleanup EXIT

-          echo "=== Recent logs for debugging ==="
-          kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
+        # Run the actual installation with detailed logging
+        echo "=== Starting ct install ==="
+        set +e
+        ct install --all \
+          --helm-extra-set-args="\
+            --set=nginx.enabled=false \
+            --set=minio.enabled=false \
+            --set=vespa.enabled=false \
+            --set=slackbot.enabled=false \
+            --set=postgresql.enabled=true \
+            --set=postgresql.nameOverride=cloudnative-pg \
+            --set=postgresql.cluster.storage.storageClass=standard \
+            --set=redis.enabled=true \
+            --set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
+            --set=webserver.replicaCount=1 \
+            --set=api.replicaCount=0 \
+            --set=inferenceCapability.replicaCount=0 \
+            --set=indexCapability.replicaCount=0 \
+            --set=celery_beat.replicaCount=0 \
+            --set=celery_worker_heavy.replicaCount=0 \
+            --set=celery_worker_docfetching.replicaCount=0 \
+            --set=celery_worker_docprocessing.replicaCount=0 \
+            --set=celery_worker_light.replicaCount=0 \
+            --set=celery_worker_monitoring.replicaCount=0 \
+            --set=celery_worker_primary.replicaCount=0 \
+            --set=celery_worker_user_file_processing.replicaCount=0 \
+            --set=celery_worker_user_files_indexing.replicaCount=0" \
+          --helm-extra-args="--timeout 900s --debug" \
+          --debug --config ct.yaml
+        CT_EXIT=$?
+        set -e

-          echo "=== Helm releases ==="
-          helm list --all-namespaces
-        # the following would install only changed charts, but we only have one chart so
-        # don't worry about that for now
-        # run: ct install --target-branch ${{ github.event.repository.default_branch }}
+        if [[ $CT_EXIT -ne 0 ]]; then
+          echo "ct install failed with exit code $CT_EXIT"
+          exit $CT_EXIT
+        else
+          echo "=== Installation completed successfully ==="
+        fi
+
+        kubectl get pods --all-namespaces
+
+    - name: Post-install verification
+      if: steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Post-install verification ==="
+        kubectl get pods --all-namespaces
+        kubectl get services --all-namespaces
+        # Only show issues if they exist
+        kubectl describe pods --all-namespaces | grep -A 5 -B 2 "Failed\|Error\|Warning" || echo "No pod issues found"
+
+    - name: Cleanup on failure
+      if: failure() && steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Cleanup on failure ==="
+        echo "=== Final cluster state ==="
+        kubectl get pods --all-namespaces
+        kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
+
+        echo "=== Pod descriptions for debugging ==="
+        kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
+
+        echo "=== Recent logs for debugging ==="
+        kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
+
+        echo "=== Helm releases ==="
+        helm list --all-namespaces
+      # the following would install only changed charts, but we only have one chart so
+      # don't worry about that for now
+      # run: ct install --target-branch ${{ github.event.repository.default_branch }}
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -56,7 +56,7 @@ jobs:
        id: set-matrix
        run: |
          # Find all leaf-level directories in both test directories
-          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" ! -name "mcp" -exec basename {} \; | sort)
+          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
          connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)

          # Create JSON array with directory info
@@ -103,7 +103,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -163,7 +163,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -208,7 +208,7 @@ jobs:
          persist-credentials: false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling openapitools/openapi-generator-cli
      # otherwise, we hit the "Unauthenticated users" limit
@@ -310,9 +310,7 @@ jobs:
          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
          INTEGRATION_TESTS_MODE=true
          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
-          AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
          MCP_SERVER_ENABLED=true
-          USE_LIGHTWEIGHT_BACKGROUND_WORKER=false
          EOF

      - name: Start Docker containers
@@ -326,6 +324,7 @@ jobs:
            api_server \
            inference_model_server \
            indexing_model_server \
+            mcp_server \
            background \
            -d
        id: start_docker
@@ -368,6 +367,12 @@ jobs:
          }

          wait_for_service "http://localhost:8080/health" "API server"
+          test_dir="${{ matrix.test-dir.path }}"
+          if [ "$test_dir" = "tests/mcp" ]; then
+            wait_for_service "http://localhost:8090/health" "MCP server"
+          else
+            echo "Skipping MCP server wait for non-MCP suite: $test_dir"
+          fi
          echo "Finished waiting for services."

      - name: Start Mock Services
@@ -397,6 +402,8 @@ jobs:
              -e VESPA_HOST=index \
              -e REDIS_HOST=cache \
              -e API_SERVER_HOST=api_server \
+              -e MCP_SERVER_HOST=mcp_server \
+              -e MCP_SERVER_PORT=8090 \
              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
              -e EXA_API_KEY=${EXA_API_KEY} \
              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
@@ -439,7 +446,7 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
@@ -481,10 +488,10 @@ jobs:
          AUTH_TYPE=cloud \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          OPENAI_DEFAULT_API_KEY=${OPENAI_API_KEY} \
          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
          DEV_MODE=true \
+          MCP_SERVER_ENABLED=true \
          docker compose -f docker-compose.multitenant-dev.yml up \
            relational_db \
            index \
@@ -493,6 +500,7 @@ jobs:
            api_server \
            inference_model_server \
            indexing_model_server \
+            mcp_server \
            background \
            -d
        id: start_docker_multi_tenant
@@ -541,6 +549,8 @@ jobs:
            -e VESPA_HOST=index \
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
+            -e MCP_SERVER_HOST=mcp_server \
+            -e MCP_SERVER_PORT=8090 \
            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
            -e EXA_API_KEY=${EXA_API_KEY} \
            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
@@ -568,7 +578,7 @@ jobs:

      - name: Upload logs (multi-tenant)
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs-multitenant
          path: ${{ github.workspace }}/docker-compose-multitenant.log
--- a/.github/workflows/pr-jest-tests.yml
+++ b/.github/workflows/pr-jest-tests.yml
@@ -44,7 +44,7 @@ jobs:

      - name: Upload coverage reports
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: jest-coverage-${{ github.run_id }}
          path: ./web/coverage
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -48,7 +48,7 @@ jobs:
        id: set-matrix
        run: |
          # Find all leaf-level directories in both test directories
-          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" ! -name "mcp" -exec basename {} \; | sort)
+          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
          connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)

          # Create JSON array with directory info
@@ -95,7 +95,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -155,7 +155,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling Vespa, Redis, Postgres, and Minio images
      # otherwise, we hit the "Unauthenticated users" limit
@@ -214,7 +214,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling openapitools/openapi-generator-cli
      # otherwise, we hit the "Unauthenticated users" limit
@@ -301,7 +301,6 @@ jobs:
          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
          INTEGRATION_TESTS_MODE=true
          MCP_SERVER_ENABLED=true
-          AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
          EOF

      - name: Start Docker containers
@@ -315,6 +314,7 @@ jobs:
            api_server \
            inference_model_server \
            indexing_model_server \
+            mcp_server \
            background \
            -d
        id: start_docker
@@ -357,6 +357,12 @@ jobs:
          }

          wait_for_service "http://localhost:8080/health" "API server"
+          test_dir="${{ matrix.test-dir.path }}"
+          if [ "$test_dir" = "tests/mcp" ]; then
+            wait_for_service "http://localhost:8090/health" "MCP server"
+          else
+            echo "Skipping MCP server wait for non-MCP suite: $test_dir"
+          fi
          echo "Finished waiting for services."

      - name: Start Mock Services
@@ -387,6 +393,8 @@ jobs:
              -e VESPA_HOST=index \
              -e REDIS_HOST=cache \
              -e API_SERVER_HOST=api_server \
+              -e MCP_SERVER_HOST=mcp_server \
+              -e MCP_SERVER_PORT=8090 \
              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
              -e EXA_API_KEY=${EXA_API_KEY} \
              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
@@ -424,7 +432,7 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -85,7 +85,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
@@ -146,7 +146,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
@@ -207,7 +207,7 @@ jobs:
          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # ratchet:docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # ratchet:docker/setup-buildx-action@v3

      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
      # https://docs.docker.com/docker-hub/usage/
@@ -435,7 +435,7 @@ jobs:
          fi
          npx playwright test --project ${PROJECT}

-      - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        if: always()
        with:
          # Includes test results and trace.zip files
@@ -455,7 +455,7 @@ jobs:

      - name: Upload logs
        if: success() || failure()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log
--- a/.github/workflows/pr-python-checks.yml
+++ b/.github/workflows/pr-python-checks.yml
@@ -50,9 +50,8 @@ jobs:
        uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
        with:
          path: backend/.mypy_cache
-          key: mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
+          key: mypy-${{ runner.os }}-${{ hashFiles('**/*.py', '**/*.pyi', 'backend/pyproject.toml') }}
          restore-keys: |
-            mypy-${{ runner.os }}-${{ github.base_ref || github.event.merge_group.base_ref || 'main' }}-
            mypy-${{ runner.os }}-

      - name: Run MyPy
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -5,6 +5,11 @@ on:
    # This cron expression runs the job daily at 16:00 UTC (9am PT)
    - cron: "0 16 * * *"
  workflow_dispatch:
+    inputs:
+      branch:
+        description: 'Branch to run the workflow on'
+        required: false
+        default: 'main'

 permissions:
  contents: read
@@ -26,11 +31,7 @@ env:
 jobs:
  model-check:
    # See https://runs-on.com/runners/linux/
-    runs-on:
-      - runs-on
-      - runner=4cpu-linux-arm64
-      - "run-id=${{ github.run_id }}-model-check"
-      - "extras=ecr-cache"
+    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]
    timeout-minutes: 45

    env:
@@ -42,87 +43,108 @@ jobs:
        with:
          persist-credentials: false

-      - name: Setup Python and Install Dependencies
-        uses: ./.github/actions/setup-python-and-install-dependencies
-        with:
-          requirements: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
-
-      - name: Format branch name for cache
-        id: format-branch
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          REF_NAME: ${{ github.ref_name }}
-        run: |
-          if [ -n "${PR_NUMBER}" ]; then
-            CACHE_SUFFIX="${PR_NUMBER}"
-          else
-            # shellcheck disable=SC2001
-            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
-          fi
-          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
-
      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f
+      # tag every docker image with "test" so that we can spin up the correct set
+      # of images during testing

-      - name: Build and load
-        uses: docker/bake-action@5be5f02ff8819ecd3092ea6b2e6261c31774f2b4 # ratchet:docker/bake-action@v6
-        env:
-          TAG: model-server-${{ github.run_id }}
+      # We don't need to build the Web Docker image since it's not yet used
+      # in the integration tests. We have a separate action to verify that it builds
+      # successfully.
+      - name: Pull Model Server Docker image
+        run: |
+          docker pull onyxdotapp/onyx-model-server:latest
+          docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
+
+      - name: Set up Python
+        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
        with:
-          load: true
-          targets: model-server
-          set: |
-            model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
-            model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
-            model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
-            model-server.cache-from=type=registry,ref=onyxdotapp/onyx-model-server:latest
-            model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
-            model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
-            model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max
+          python-version: "3.11"
+          cache: "pip"
+          cache-dependency-path: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+
+      - name: Install Dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
+          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt

      - name: Start Docker containers
-        id: start_docker
-        env:
-          IMAGE_TAG: model-server-${{ github.run_id }}
        run: |
          cd deployment/docker_compose
-          docker compose \
-            -f docker-compose.yml \
-            -f docker-compose.dev.yml \
-            up -d --wait \
-            inference_model_server
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
+          AUTH_TYPE=basic \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          IMAGE_TAG=test \
+          docker compose -f docker-compose.model-server-test.yml up -d indexing_model_server
+        id: start_docker
+
+      - name: Wait for service to be ready
+        run: |
+          echo "Starting wait-for-service script..."
+
+          start_time=$(date +%s)
+          timeout=300  # 5 minutes in seconds
+
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. Service did not become ready in 5 minutes."
+              exit 1
+            fi
+
+            # Use curl with error handling to ignore specific exit code 56
+            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
+
+            if [ "$response" = "200" ]; then
+              echo "Service is ready!"
+              break
+            elif [ "$response" = "curl_error" ]; then
+              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
+            else
+              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
+            fi
+
+            sleep 5
+          done
+          echo "Finished waiting for service."

      - name: Run Tests
+        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm
          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding

      - name: Alert on Failure
        if: failure() && github.event_name == 'schedule'
-        uses: ./.github/actions/slack-notify
-        with:
-          webhook-url: ${{ secrets.SLACK_WEBHOOK }}
-          failed-jobs: model-check
-          title: "🚨 Scheduled Model Tests failed!"
-          ref-name: ${{ github.ref_name }}
+        env:
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+          REPO: ${{ github.repository }}
+          RUN_ID: ${{ github.run_id }}
+        run: |
+          curl -X POST \
+            -H 'Content-type: application/json' \
+            --data "{\"text\":\"Scheduled Model Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
+            $SLACK_WEBHOOK

      - name: Dump all-container logs (optional)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+          docker compose -f docker-compose.model-server-test.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
        with:
          name: docker-all-logs
          path: ${{ github.workspace }}/docker-compose.log
--- a/.github/workflows/zizmor.yml
+++ b/.github/workflows/zizmor.yml
@@ -21,29 +21,18 @@ jobs:
        with:
          persist-credentials: false

-      - name: Detect changes
-        id: filter
-        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # ratchet:dorny/paths-filter@v3
-        with:
-          filters: |
-            zizmor:
-              - '.github/**'
-
      - name: Install the latest version of uv
-        if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
        uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
          version: "0.9.9"

      - name: Run zizmor
-        if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
        run: uv run --no-sync --with zizmor zizmor --format=sarif . > results.sarif
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - name: Upload SARIF file
-        if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
        uses: github/codeql-action/upload-sarif@ba454b8ab46733eb6145342877cd148270bb77ab # ratchet:github/codeql-action/upload-sarif@codeql-bundle-v2.23.5
        with:
          sarif_file: results.sarif
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,5 @@
 # editors
 .vscode
-!/.vscode/env_template.txt
-!/.vscode/launch.json
-!/.vscode/tasks.template.jsonc
 .zed
 .cursor

@@ -24,7 +21,6 @@ backend/tests/regression/search_quality/*.json
 backend/onyx/evals/data/
 backend/onyx/evals/one_off/*.json
 *.log
-*.csv

 # secret files
 .env
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,8 +9,9 @@ repos:
    rev: d30b4298e4fb63ce8609e29acdbcf4c9018a483c
    hooks:
      - id: uv-sync
-        args: ["--locked", "--all-extras"]
+        args: ["--active", "--locked", "--all-extras"]
      - id: uv-lock
+        files: ^pyproject\.toml$
      - id: uv-export
        name: uv-export default.txt
        args:
@@ -74,13 +75,6 @@ repos:
      #   pass_filenames: true
      #   files: ^backend/.*\.py$

-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c # frozen: v6.0.0
-    hooks:
-      - id: check-added-large-files
-        name: Check for added large files
-        args: ["--maxkb=1500"]
-
  - repo: https://github.com/rhysd/actionlint
    rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
    hooks:
@@ -153,22 +147,6 @@ repos:
        pass_filenames: false
        files: \.tf$

-      - id: npm-install
-        name: npm install
-        description: "Automatically run 'npm install' after a checkout, pull or rebase"
-        language: system
-        entry: bash -c 'cd web && npm install --no-save'
-        pass_filenames: false
-        files: ^web/package(-lock)?\.json$
-        stages: [post-checkout, post-merge, post-rewrite]
-      - id: npm-install-check
-        name: npm install --package-lock-only
-        description: "Check the 'web/package-lock.json' is updated"
-        language: system
-        entry: bash -c 'cd web && npm install --package-lock-only'
-        pass_filenames: false
-        files: ^web/package(-lock)?\.json$
-
      # Uses tsgo (TypeScript's native Go compiler) for ~10x faster type checking.
      # This is a preview package - if it breaks:
      #   1. Try updating: cd web && npm update @typescript/native-preview
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@@ -1,39 +1,36 @@
-# Copy this file to .env in the .vscode folder.
-# Fill in the <REPLACE THIS> values as needed; it is recommended to set the
-# GEN_AI_API_KEY value to avoid having to set up an LLM in the UI.
-# Also check out onyx/backend/scripts/restart_containers.sh for a script to
-# restart the containers which Onyx relies on outside of VSCode/Cursor
-# processes.
+# Copy this file to .env in the .vscode folder
+# Fill in the <REPLACE THIS> values as needed, it is recommended to set the GEN_AI_API_KEY value to avoid having to set up an LLM in the UI
+# Also check out onyx/backend/scripts/restart_containers.sh for a script to restart the containers which Onyx relies on outside of VSCode/Cursor processes

-
-# For local dev, often user Authentication is not needed.
+# For local dev, often user Authentication is not needed
 AUTH_TYPE=disabled

-
-# Always keep these on for Dev.
-# Logs model prompts, reasoning, and answer to stdout.
+# Always keep these on for Dev
+# Logs model prompts, reasoning, and answer to stdout
 LOG_ONYX_MODEL_INTERACTIONS=True
 # More verbose logging
 LOG_LEVEL=debug


-# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically).
+# This passes top N results to LLM an additional time for reranking prior to answer generation
+# This step is quite heavy on token usage so we disable it for dev generally
+DISABLE_LLM_DOC_RELEVANCE=False
+
+
+# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically)
 OAUTH_CLIENT_ID=<REPLACE THIS>
 OAUTH_CLIENT_SECRET=<REPLACE THIS>
 OPENID_CONFIG_URL=<REPLACE THIS>
 SAML_CONF_DIR=/<ABSOLUTE PATH TO ONYX>/onyx/backend/ee/onyx/configs/saml_config

-
-# Generally not useful for dev, we don't generally want to set up an SMTP server
-# for dev.
+# Generally not useful for dev, we don't generally want to set up an SMTP server for dev
 REQUIRE_EMAIL_VERIFICATION=False


-# Set these so if you wipe the DB, you don't end up having to go through the UI
-# every time.
+# Set these so if you wipe the DB, you don't end up having to go through the UI every time
 GEN_AI_API_KEY=<REPLACE THIS>
 OPENAI_API_KEY=<REPLACE THIS>
-# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper.
+# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper
 GEN_AI_MODEL_VERSION=gpt-4o
 FAST_GEN_AI_MODEL_VERSION=gpt-4o

@@ -43,36 +40,26 @@ PYTHONPATH=../backend
 PYTHONUNBUFFERED=1


-# Enable the full set of Danswer Enterprise Edition features.
-# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you
-# are using this for local testing/development).
+# Enable the full set of Danswer Enterprise Edition features
+# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development)
 ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False

-
 # S3 File Store Configuration (MinIO for local development)
 S3_ENDPOINT_URL=http://localhost:9004
 S3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket
 S3_AWS_ACCESS_KEY_ID=minioadmin
 S3_AWS_SECRET_ACCESS_KEY=minioadmin

-
-# Show extra/uncommon connectors.
+# Show extra/uncommon connectors
 SHOW_EXTRA_CONNECTORS=True

-
 # Local langsmith tracing
 LANGSMITH_TRACING="true"
 LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
 LANGSMITH_API_KEY=<REPLACE_THIS>
 LANGSMITH_PROJECT=<REPLACE_THIS>

-
 # Local Confluence OAuth testing
 # OAUTH_CONFLUENCE_CLOUD_CLIENT_ID=<REPLACE_THIS>
 # OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET=<REPLACE_THIS>
-# NEXT_PUBLIC_TEST_ENV=True
-
-
-# OpenSearch
-# Arbitrary password is fine for local development.
-OPENSEARCH_INITIAL_ADMIN_PASSWORD=<REPLACE THIS>
+# NEXT_PUBLIC_TEST_ENV=True
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -1,3 +1,5 @@
+/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */
+
 {
  // Use IntelliSense to learn about possible attributes.
  // Hover to view descriptions of existing attributes.
@@ -22,7 +24,7 @@
        "Slack Bot",
        "Celery primary",
        "Celery light",
-        "Celery heavy",
+        "Celery background",
        "Celery docfetching",
        "Celery docprocessing",
        "Celery beat"
@@ -149,24 +151,6 @@
      },
      "consoleTitle": "Slack Bot Console"
    },
-    {
-      "name": "Discord Bot",
-      "consoleName": "Discord Bot",
-      "type": "debugpy",
-      "request": "launch",
-      "program": "onyx/onyxbot/discord/client.py",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Discord Bot Console"
-    },
    {
      "name": "MCP Server",
      "consoleName": "MCP Server",
@@ -528,21 +512,6 @@
        "group": "3"
      }
    },
-    {
-      "name": "Clear and Restart OpenSearch Container",
-      // Generic debugger type, required arg but has no bearing on bash.
-      "type": "node",
-      "request": "launch",
-      "runtimeExecutable": "bash",
-      "runtimeArgs": [
-        "${workspaceFolder}/backend/scripts/restart_opensearch_container.sh"
-      ],
-      "cwd": "${workspaceFolder}",
-      "console": "integratedTerminal",
-      "presentation": {
-        "group": "3"
-      }
-    },
    {
      "name": "Eval CLI",
      "type": "debugpy",
@@ -595,120 +564,6 @@
        "group": "3"
      }
    },
-    {
-      // Dummy entry used to label the group
-      "name": "--- Database ---",
-      "type": "node",
-      "request": "launch",
-      "presentation": {
-        "group": "4",
-        "order": 0
-      }
-    },
-    {
-      "name": "Restore seeded database dump",
-      "type": "node",
-      "request": "launch",
-      "runtimeExecutable": "uv",
-      "runtimeArgs": [
-        "run",
-        "--with",
-        "onyx-devtools",
-        "ods",
-        "db",
-        "restore",
-        "--fetch-seeded",
-        "--yes"
-      ],
-      "cwd": "${workspaceFolder}",
-      "console": "integratedTerminal",
-      "presentation": {
-        "group": "4"
-      }
-    },
-    {
-      "name": "Clean restore seeded database dump (destructive)",
-      "type": "node",
-      "request": "launch",
-      "runtimeExecutable": "uv",
-      "runtimeArgs": [
-        "run",
-        "--with",
-        "onyx-devtools",
-        "ods",
-        "db",
-        "restore",
-        "--fetch-seeded",
-        "--clean",
-        "--yes"
-      ],
-      "cwd": "${workspaceFolder}",
-      "console": "integratedTerminal",
-      "presentation": {
-        "group": "4"
-      }
-    },
-    {
-      "name": "Create database snapshot",
-      "type": "node",
-      "request": "launch",
-      "runtimeExecutable": "uv",
-      "runtimeArgs": [
-        "run",
-        "--with",
-        "onyx-devtools",
-        "ods",
-        "db",
-        "dump",
-        "backup.dump"
-      ],
-      "cwd": "${workspaceFolder}",
-      "console": "integratedTerminal",
-      "presentation": {
-        "group": "4"
-      }
-    },
-    {
-      "name": "Clean restore database snapshot (destructive)",
-      "type": "node",
-      "request": "launch",
-      "runtimeExecutable": "uv",
-      "runtimeArgs": [
-        "run",
-        "--with",
-        "onyx-devtools",
-        "ods",
-        "db",
-        "restore",
-        "--clean",
-        "--yes",
-        "backup.dump"
-      ],
-      "cwd": "${workspaceFolder}",
-      "console": "integratedTerminal",
-      "presentation": {
-        "group": "4"
-      }
-    },
-    {
-      "name": "Upgrade database to head revision",
-      "type": "node",
-      "request": "launch",
-      "runtimeExecutable": "uv",
-      "runtimeArgs": [
-        "run",
-        "--with",
-        "onyx-devtools",
-        "ods",
-        "db",
-        "upgrade"
-      ],
-      "cwd": "${workspaceFolder}",
-      "console": "integratedTerminal",
-      "presentation": {
-        "group": "4"
-      }
-    },
    {
      // script to generate the openapi schema
      "name": "Onyx OpenAPI Schema Generator",
--- a/AGENTS.md.template
+++ b/AGENTS.md.template
@@ -1,13 +1,13 @@
 # AGENTS.md

-This file provides guidance to AI agents when working with code in this repository.
+This file provides guidance to Codex when working with code in this repository.

 ## KEY NOTES

- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
+- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
 to assume the python venv.
 - To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
+- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
 `a`. The app can be accessed at `http://localhost:3000`.
 - You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
 make sure we see logs coming out from the relevant service.
@@ -181,286 +181,6 @@ web/
 └── src/lib/                     # Utilities & business logic
 ```

-## Frontend Standards
-
-### 1. Import Standards
-
-**Always use absolute imports with the `@` prefix.**
-
-**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.
-
-```typescript
-// ✅ Good
-import { Button } from "@/components/ui/button";
-import { useAuth } from "@/hooks/useAuth";
-import { Text } from "@/refresh-components/texts/Text";
-
-// ❌ Bad
-import { Button } from "../../../components/ui/button";
-import { useAuth } from "./hooks/useAuth";
-```
-
-### 2. React Component Functions
-
-**Prefer regular functions over arrow functions for React components.**
-
-**Reason:** Functions just become easier to read.
-
-```typescript
-// ✅ Good
-function UserProfile({ userId }: UserProfileProps) {
-  return <div>User Profile</div>
-}
-
-// ❌ Bad
-const UserProfile = ({ userId }: UserProfileProps) => {
-  return <div>User Profile</div>
-}
-```
-
-### 3. Props Interface Extraction
-
-**Extract prop types into their own interface definitions.**
-
-**Reason:** Functions just become easier to read.
-
-```typescript
-// ✅ Good
-interface UserCardProps {
-  user: User
-  showActions?: boolean
-  onEdit?: (userId: string) => void
-}
-
-function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
-  return <div>User Card</div>
-}
-
-// ❌ Bad
-function UserCard({
-  user,
-  showActions = false,
-  onEdit
-}: {
-  user: User
-  showActions?: boolean
-  onEdit?: (userId: string) => void
-}) {
-  return <div>User Card</div>
-}
-```
-
-### 4. Spacing Guidelines
-
-**Prefer padding over margins for spacing.**
-
-**Reason:** We want to consolidate usage to paddings instead of margins.
-
-```typescript
-// ✅ Good
-<div className="p-4 space-y-2">
-  <div className="p-2">Content</div>
-</div>
-
-// ❌ Bad
-<div className="m-4 space-y-2">
-  <div className="m-2">Content</div>
-</div>
-```
-
-### 5. Tailwind Dark Mode
-
-**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**
-
-**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.
-
-**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.
-
-```typescript
-// ✅ Good - Standard components use `web/tailwind-themes/tailwind.config.js` / `web/src/app/css/colors.css`
-<div className="bg-background-neutral-03 text-text-02">
-  Content
-</div>
-
-// ✅ Good - Logo icons with dark mode handling via createLogoIcon
-export const GithubIcon = createLogoIcon(githubLightIcon, {
-  monochromatic: true,  // Will apply dark:invert internally
-});
-
-export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
-  darkSrc: gitbookDarkIcon,  // Will use dark:hidden/dark:block internally
-});
-
-// ❌ Bad - Manual dark mode overrides
-<div className="bg-white dark:bg-black text-black dark:text-white">
-  Content
-</div>
-```
-
-### 6. Class Name Utilities
-
-**Use the `cn` utility instead of raw string formatting for classNames.**
-
-**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.
-
-```typescript
-import { cn } from '@/lib/utils'
-
-// ✅ Good
-<div className={cn(
-  'base-class',
-  isActive && 'active-class',
-  className
-)}>
-  Content
-</div>
-
-// ❌ Bad
-<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
-  Content
-</div>
-```
-
-### 7. Custom Hooks Organization
-
-**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**
-
-**Reason:** This is just a layout preference. Keeps code clean.
-
-```typescript
-// web/src/hooks/useUserData.ts
-export function useUserData(userId: string) {
-  // hook implementation
-}
-
-// web/src/hooks/useLocalStorage.ts
-export function useLocalStorage<T>(key: string, initialValue: T) {
-  // hook implementation
-}
-```
-
-### 8. Icon Usage
-
-**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**
-
-**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.
-
-```typescript
-// ✅ Good
-import SvgX from "@/icons/x";
-import SvgMoreHorizontal from "@/icons/more-horizontal";
-
-// ❌ Bad
-import { User } from "lucide-react";
-import { FiSearch } from "react-icons/fi";
-```
-
-**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
-If you need help with this step, reach out to `raunak@onyx.app`.
-
-### 9. Text Rendering
-
-**Prefer using the `refresh-components/texts/Text` component for all text rendering. Avoid "naked" text nodes.**
-
-**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It provides easy utilities to specify the text-colour and font-size in the form of flags. Super duper easy.
-
-```typescript
-// ✅ Good
-import { Text } from '@/refresh-components/texts/Text'
-
-function UserCard({ name }: { name: string }) {
-  return (
-    <Text
-      {/* The `text03` flag makes the text it renders to be coloured the 3rd-scale grey */}
-      text03
-      {/* The `mainAction` flag makes the text it renders to be "main-action" font + line-height + weightage, as described in the Figma */}
-      mainAction
-    >
-      {name}
-    </Text>
-  )
-}
-
-// ❌ Bad
-function UserCard({ name }: { name: string }) {
-  return (
-    <div>
-      <h2>{name}</h2>
-      <p>User details</p>
-    </div>
-  )
-}
-```
-
-### 10. Component Usage
-
-**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**
-
-**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.
-
-```typescript
-// ✅ Good
-import Button from '@/refresh-components/buttons/Button'
-import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
-import SvgPlusCircle from '@/icons/plus-circle'
-
-function ContactForm() {
-  return (
-    <form>
-      <InputTypeIn placeholder="Search..." />
-      <Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
-    </form>
-  )
-}
-
-// ❌ Bad
-function ContactForm() {
-  return (
-    <form>
-      <input placeholder="Name" />
-      <textarea placeholder="Message" />
-      <button type="submit">Submit</button>
-    </form>
-  )
-}
-```
-
-### 11. Colors
-
-**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**
-
-**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
-
-**Available color categories:**
- **Text:** `text-01` through `text-05`, `text-inverted-XX`
- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
- **Actions:** `action-link-XX`, `action-danger-XX`
- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.
-
-```typescript
-// ✅ Good - Use custom Onyx color classes
-<div className="bg-background-neutral-01 border border-border-02" />
-<div className="bg-background-tint-02 border border-border-01" />
-<div className="bg-status-success-01" />
-<div className="bg-action-link-01" />
-<div className="bg-theme-primary-05" />
-
-// ❌ Bad - Do NOT use standard Tailwind colors
-<div className="bg-gray-100 border border-gray-300 text-gray-600" />
-<div className="bg-white border border-slate-200" />
-<div className="bg-green-100 text-green-700" />
-<div className="bg-blue-100 text-blue-600" />
-<div className="bg-indigo-500" />
-```
-
-### 12. Data Fetching
-
-**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**
-
-**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).
-
 ## Database & Migrations

 ### Running Migrations
@@ -575,6 +295,14 @@ will be tailing their logs to this file.
 - Token management and rate limiting
 - Custom prompts and agent actions

+## UI/UX Patterns
+
+- Tailwind CSS with design system in `web/src/components/ui/`
+- Radix UI and Headless UI for accessible components
+- SWR for data fetching and caching
+- Form validation with react-hook-form
+- Error handling with popup notifications
+
 ## Creating a Plan
 When creating a plan in the `plans` directory, make sure to include at least these elements:

--- a/CLAUDE.md.template
+++ b/CLAUDE.md.template
@@ -7,7 +7,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 - If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
 to assume the python venv.
 - To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
+- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
 `a`. The app can be accessed at `http://localhost:3000`.
 - You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
 make sure we see logs coming out from the relevant service.
@@ -184,286 +184,6 @@ web/
 └── src/lib/                     # Utilities & business logic
 ```

-## Frontend Standards
-
-### 1. Import Standards
-
-**Always use absolute imports with the `@` prefix.**
-
-**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.
-
-```typescript
-// ✅ Good
-import { Button } from "@/components/ui/button";
-import { useAuth } from "@/hooks/useAuth";
-import { Text } from "@/refresh-components/texts/Text";
-
-// ❌ Bad
-import { Button } from "../../../components/ui/button";
-import { useAuth } from "./hooks/useAuth";
-```
-
-### 2. React Component Functions
-
-**Prefer regular functions over arrow functions for React components.**
-
-**Reason:** Functions just become easier to read.
-
-```typescript
-// ✅ Good
-function UserProfile({ userId }: UserProfileProps) {
-  return <div>User Profile</div>
-}
-
-// ❌ Bad
-const UserProfile = ({ userId }: UserProfileProps) => {
-  return <div>User Profile</div>
-}
-```
-
-### 3. Props Interface Extraction
-
-**Extract prop types into their own interface definitions.**
-
-**Reason:** Functions just become easier to read.
-
-```typescript
-// ✅ Good
-interface UserCardProps {
-  user: User
-  showActions?: boolean
-  onEdit?: (userId: string) => void
-}
-
-function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
-  return <div>User Card</div>
-}
-
-// ❌ Bad
-function UserCard({
-  user,
-  showActions = false,
-  onEdit
-}: {
-  user: User
-  showActions?: boolean
-  onEdit?: (userId: string) => void
-}) {
-  return <div>User Card</div>
-}
-```
-
-### 4. Spacing Guidelines
-
-**Prefer padding over margins for spacing.**
-
-**Reason:** We want to consolidate usage to paddings instead of margins.
-
-```typescript
-// ✅ Good
-<div className="p-4 space-y-2">
-  <div className="p-2">Content</div>
-</div>
-
-// ❌ Bad
-<div className="m-4 space-y-2">
-  <div className="m-2">Content</div>
-</div>
-```
-
-### 5. Tailwind Dark Mode
-
-**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**
-
-**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.
-
-**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.
-
-```typescript
-// ✅ Good - Standard components use `tailwind-themes/tailwind.config.js` / `src/app/css/colors.css`
-<div className="bg-background-neutral-03 text-text-02">
-  Content
-</div>
-
-// ✅ Good - Logo icons with dark mode handling via createLogoIcon
-export const GithubIcon = createLogoIcon(githubLightIcon, {
-  monochromatic: true,  // Will apply dark:invert internally
-});
-
-export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
-  darkSrc: gitbookDarkIcon,  // Will use dark:hidden/dark:block internally
-});
-
-// ❌ Bad - Manual dark mode overrides
-<div className="bg-white dark:bg-black text-black dark:text-white">
-  Content
-</div>
-```
-
-### 6. Class Name Utilities
-
-**Use the `cn` utility instead of raw string formatting for classNames.**
-
-**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.
-
-```typescript
-import { cn } from '@/lib/utils'
-
-// ✅ Good
-<div className={cn(
-  'base-class',
-  isActive && 'active-class',
-  className
-)}>
-  Content
-</div>
-
-// ❌ Bad
-<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
-  Content
-</div>
-```
-
-### 7. Custom Hooks Organization
-
-**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**
-
-**Reason:** This is just a layout preference. Keeps code clean.
-
-```typescript
-// web/src/hooks/useUserData.ts
-export function useUserData(userId: string) {
-  // hook implementation
-}
-
-// web/src/hooks/useLocalStorage.ts
-export function useLocalStorage<T>(key: string, initialValue: T) {
-  // hook implementation
-}
-```
-
-### 8. Icon Usage
-
-**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**
-
-**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.
-
-```typescript
-// ✅ Good
-import SvgX from "@/icons/x";
-import SvgMoreHorizontal from "@/icons/more-horizontal";
-
-// ❌ Bad
-import { User } from "lucide-react";
-import { FiSearch } from "react-icons/fi";
-```
-
-**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
-If you need help with this step, reach out to `raunak@onyx.app`.
-
-### 9. Text Rendering
-
-**Prefer using the `refresh-components/texts/Text` component for all text rendering. Avoid "naked" text nodes.**
-
-**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It provides easy utilities to specify the text-colour and font-size in the form of flags. Super duper easy.
-
-```typescript
-// ✅ Good
-import { Text } from '@/refresh-components/texts/Text'
-
-function UserCard({ name }: { name: string }) {
-  return (
-    <Text
-      {/* The `text03` flag makes the text it renders to be coloured the 3rd-scale grey */}
-      text03
-      {/* The `mainAction` flag makes the text it renders to be "main-action" font + line-height + weightage, as described in the Figma */}
-      mainAction
-    >
-      {name}
-    </Text>
-  )
-}
-
-// ❌ Bad
-function UserCard({ name }: { name: string }) {
-  return (
-    <div>
-      <h2>{name}</h2>
-      <p>User details</p>
-    </div>
-  )
-}
-```
-
-### 10. Component Usage
-
-**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**
-
-**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.
-
-```typescript
-// ✅ Good
-import Button from '@/refresh-components/buttons/Button'
-import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
-import SvgPlusCircle from '@/icons/plus-circle'
-
-function ContactForm() {
-  return (
-    <form>
-      <InputTypeIn placeholder="Search..." />
-      <Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
-    </form>
-  )
-}
-
-// ❌ Bad
-function ContactForm() {
-  return (
-    <form>
-      <input placeholder="Name" />
-      <textarea placeholder="Message" />
-      <button type="submit">Submit</button>
-    </form>
-  )
-}
-```
-
-### 11. Colors
-
-**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**
-
-**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
-
-**Available color categories:**
- **Text:** `text-01` through `text-05`, `text-inverted-XX`
- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
- **Actions:** `action-link-XX`, `action-danger-XX`
- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.
-
-```typescript
-// ✅ Good - Use custom Onyx color classes
-<div className="bg-background-neutral-01 border border-border-02" />
-<div className="bg-background-tint-02 border border-border-01" />
-<div className="bg-status-success-01" />
-<div className="bg-action-link-01" />
-<div className="bg-theme-primary-05" />
-
-// ❌ Bad - Do NOT use standard Tailwind colors
-<div className="bg-gray-100 border border-gray-300 text-gray-600" />
-<div className="bg-white border border-slate-200" />
-<div className="bg-green-100 text-green-700" />
-<div className="bg-blue-100 text-blue-600" />
-<div className="bg-indigo-500" />
-```
-
-### 12. Data Fetching
-
-**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**
-
-**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).
-
 ## Database & Migrations

 ### Running Migrations
@@ -580,6 +300,14 @@ will be tailing their logs to this file.
 - Token management and rate limiting
 - Custom prompts and agent actions

+## UI/UX Patterns
+
+- Tailwind CSS with design system in `web/src/components/ui/`
+- Radix UI and Headless UI for accessible components
+- SWR for data fetching and caching
+- Form validation with react-hook-form
+- Error handling with popup notifications
+
 ## Creating a Plan
 When creating a plan in the `plans` directory, make sure to include at least these elements:

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,31 +1,262 @@
+<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
+
 # Contributing to Onyx
+
 Hey there! We are so excited that you're interested in Onyx.

+As an open source project in a rapidly changing space, we welcome all contributions.

-## Contribution Opportunities
-The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to look for and share contribution ideas.
+## 💃 Guidelines

-If you have your own feature that you would like to build please create an issue and community members can provide feedback and
-thumb it up if they feel a common need. 
+### Contribution Opportunities

+The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to start for contribution ideas.

-## Contributing Code
-Please reference the documents in contributing_guides folder to ensure that the code base is kept to a high standard.
-1. dev_setup.md (start here): gives you a guide to setting up a local development environment.
-2. contribution_process.md: how to ensure you are building valuable features that will get reviewed and merged.
-3. best_practices.md: before asking for reviews, ensure your changes meet the repo code quality standards.
+To ensure that your contribution is aligned with the project's direction, please reach out to any maintainer on the Onyx team
+via [Discord](https://discord.gg/4NA5SbzrWb) or [email](mailto:hello@onyx.app).

-To contribute, please follow the
+Issues that have been explicitly approved by the maintainers (aligned with the direction of the project)
+will be marked with the `approved by maintainers` label.
+Issues marked `good first issue` are an especially great place to start.
+
+**Connectors** to other tools are another great place to contribute. For details on how, refer to this
+[README.md](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/README.md).
+
+If you have a new/different contribution in mind, we'd love to hear about it!
+Your input is vital to making sure that Onyx moves in the right direction.
+Before starting on implementation, please raise a GitHub issue.
+
+Also, always feel free to message the founders (Chris Weaver / Yuhong Sun) on
+[Discord](https://discord.gg/4NA5SbzrWb) directly about anything at all.
+
+### Contributing Code
+
+To contribute to this project, please follow the
 ["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.
+When opening a pull request, mention related issues and feel free to tag relevant maintainers.
+
+Before creating a pull request please make sure that the new changes conform to the formatting and linting requirements.
+See the [Formatting and Linting](#formatting-and-linting) section for how to run these checks locally.
+
+### Getting Help 🙋
+
+Our goal is to make contributing as easy as possible. If you run into any issues please don't hesitate to reach out.
+That way we can help future contributors and users can avoid the same issue.
+
+We also have support channels and generally interesting discussions on our
+[Discord](https://discord.gg/4NA5SbzrWb).
+
+We would love to see you there!
+
+## Get Started 🚀
+
+Onyx being a fully functional app, relies on some external software, specifically:
+
+- [Postgres](https://www.postgresql.org/) (Relational DB)
+- [Vespa](https://vespa.ai/) (Vector DB/Search Engine)
+- [Redis](https://redis.io/) (Cache)
+- [MinIO](https://min.io/) (File Store)
+- [Nginx](https://nginx.org/) (Not needed for development flows generally)
+
+> **Note:**
+> This guide provides instructions to build and run Onyx locally from source with Docker containers providing the above external software. We believe this combination is easier for
+> development purposes. If you prefer to use pre-built container images, we provide instructions on running the full Onyx stack within Docker below.
+
+### Local Set Up
+
+Be sure to use Python version 3.11. For instructions on installing Python 3.11 on macOS, refer to the [CONTRIBUTING_MACOS.md](./CONTRIBUTING_MACOS.md) readme.
+
+If using a lower version, modifications will have to be made to the code.
+If using a higher version, sometimes some libraries will not be available (i.e. we had problems with Tensorflow in the past with higher versions of python).
+
+#### Backend: Python requirements
+
+Currently, we use [uv](https://docs.astral.sh/uv/) and recommend creating a [virtual environment](https://docs.astral.sh/uv/pip/environments/#using-a-virtual-environment).
+
+For convenience here's a command for it:
+
+```bash
+uv venv .venv --python 3.11
+source .venv/bin/activate
+```
+
+_For Windows, activate the virtual environment using Command Prompt:_
+
+```bash
+.venv\Scripts\activate
+```
+
+If using PowerShell, the command slightly differs:
+
+```powershell
+.venv\Scripts\Activate.ps1
+```
+
+Install the required python dependencies:
+
+```bash
+uv sync --all-extras
+```
+
+Install Playwright for Python (headless browser required by the Web Connector):
+
+```bash
+uv run playwright install
+```
+
+#### Frontend: Node dependencies
+
+Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)](https://github.com/nvm-sh/nvm)
+to manage your Node installations. Once installed, you can run
+
+```bash
+nvm install 22 && nvm use 22
+node -v # verify your active version
+```
+
+Navigate to `onyx/web` and run:
+
+```bash
+npm i
+```
+
+## Formatting and Linting
+
+### Backend
+
+For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
+
+Then run:
+
+```bash
+uv run pre-commit install
+```
+
+Additionally, we use `mypy` for static type checking.
+Onyx is fully type-annotated, and we want to keep it that way!
+To run the mypy checks manually, run `uv run mypy .` from the `onyx/backend` directory.
+
+### Web
+
+We use `prettier` for formatting. The desired version will be installed via a `npm i` from the `onyx/web` directory.
+To run the formatter, use `npx prettier --write .` from the `onyx/web` directory.
+
+Pre-commit will also run prettier automatically on files you've recently touched. If re-formatted, your commit will fail.
+Re-stage your changes and commit again.
+
+# Running the application for development
+
+## Developing using VSCode Debugger (recommended)
+
+**We highly recommend using VSCode debugger for development.**
+See [CONTRIBUTING_VSCODE.md](./CONTRIBUTING_VSCODE.md) for more details.
+
+Otherwise, you can follow the instructions below to run the application for development.
+
+## Manually running the application for development
+### Docker containers for external software
+
+You will need Docker installed to run these containers.
+
+First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:
+
+```bash
+docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d index relational_db cache minio
+```
+
+(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
+
+### Running Onyx locally
+
+To start the frontend, navigate to `onyx/web` and run:
+
+```bash
+npm run dev
+```
+
+Next, start the model server which runs the local NLP models.
+Navigate to `onyx/backend` and run:
+
+```bash
+uvicorn model_server.main:app --reload --port 9000
+```
+
+_For Windows (for compatibility with both PowerShell and Command Prompt):_
+
+```bash
+powershell -Command "uvicorn model_server.main:app --reload --port 9000"
+```
+
+The first time running Onyx, you will need to run the DB migrations for Postgres.
+After the first time, this is no longer required unless the DB models change.
+
+Navigate to `onyx/backend` and with the venv active, run:
+
+```bash
+alembic upgrade head
+```
+
+Next, start the task queue which orchestrates the background jobs.
+Jobs that take more time are run async from the API server.
+
+Still in `onyx/backend`, run:
+
+```bash
+python ./scripts/dev_run_background_jobs.py
+```
+
+To run the backend API server, navigate back to `onyx/backend` and run:
+
+```bash
+AUTH_TYPE=disabled uvicorn onyx.main:app --reload --port 8080
+```
+
+_For Windows (for compatibility with both PowerShell and Command Prompt):_
+
+```bash
+powershell -Command "
+    $env:AUTH_TYPE='disabled'
+    uvicorn onyx.main:app --reload --port 8080
+"
+```
+
+> **Note:**
+> If you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.
+
+#### Wrapping up
+
+You should now have 4 servers running:
+
+- Web server
+- Backend API
+- Model server
+- Background jobs
+
+Now, visit `http://localhost:3000` in your browser. You should see the Onyx onboarding wizard where you can connect your external LLM provider to Onyx.
+
+You've successfully set up a local Onyx instance! 🏁
+
+#### Running the Onyx application in a container
+
+You can run the full Onyx application stack from pre-built images including all external software dependencies.
+
+Navigate to `onyx/deployment/docker_compose` and run:
+
+```bash
+docker compose up -d
+```
+
+After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Onyx.
+
+If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes like so:
+
+```bash
+docker compose up -d --build
+```


-## Getting Help 🙋
-We have support channels and generally interesting discussions on our [Discord](https://discord.gg/4NA5SbzrWb).
+### Release Process

-See you there!
-
-
-## Release Process
 Onyx loosely follows the SemVer versioning standard.
 Major changes are released with a "minor" version bump. Currently we use patch release versions to indicate small feature changes.
 A set of Docker containers will be pushed automatically to DockerHub with every tag.
--- a/contributing_guides/contributing_macos.md
+++ b/contributing_guides/contributing_macos.md
--- a/contributing_guides/contributing_vscode.md
+++ b/contributing_guides/contributing_vscode.md
@@ -7,6 +7,8 @@ This guide explains how to set up and use VSCode's debugging capabilities with t
 1. **Environment Setup**:
   - Copy `.vscode/env_template.txt` to `.vscode/.env`
   - Fill in the necessary environment variables in `.vscode/.env`
+2. **launch.json**:
+   - Copy `.vscode/launch.template.jsonc` to `.vscode/launch.json`

 ## Using the Debugger

--- a/backend/.trivyignore
+++ b/backend/.trivyignore
@@ -37,6 +37,10 @@ CVE-2023-50868
 CVE-2023-52425
 CVE-2024-28757

+# sqlite, only used by NLTK library to grab word lemmatizer and stopwords
+# No impact in our settings
+CVE-2023-7104
+
 # libharfbuzz0b, O(n^2) growth, worst case is denial of service
 # Accept the risk
 CVE-2023-25193
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -89,6 +89,12 @@ RUN uv pip install --system --no-cache-dir --upgrade \
 RUN python -c "from tokenizers import Tokenizer; \
 Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"

+# Pre-downloading NLTK for setups with limited egress
+RUN python -c "import nltk; \
+nltk.download('stopwords', quiet=True); \
+nltk.download('punkt_tab', quiet=True);"
+# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
+
 # Pre-downloading tiktoken for setups with limited egress
 RUN python -c "import tiktoken; \
 tiktoken.get_encoding('cl100k_base')"
--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -225,6 +225,7 @@ def do_run_migrations(
 ) -> None:
    if create_schema:
        connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
+        connection.execute(text("COMMIT"))

    connection.execute(text(f'SET search_path TO "{schema_name}"'))

@@ -308,7 +309,6 @@ async def run_async_migrations() -> None:
                        schema_name=schema,
                        create_schema=create_schema,
                    )
-                    await connection.commit()
            except Exception as e:
                logger.error(f"Error migrating schema {schema}: {e}")
                if not continue_on_error:
@@ -346,7 +346,6 @@ async def run_async_migrations() -> None:
                        schema_name=schema,
                        create_schema=create_schema,
                    )
-                    await connection.commit()
            except Exception as e:
                logger.error(f"Error migrating schema {schema}: {e}")
                if not continue_on_error:
--- a/backend/alembic/versions/2b90f3af54b8_usage_limits.py
+++ b/backend/alembic/versions/2b90f3af54b8_usage_limits.py
@@ -1,46 +0,0 @@
-"""usage_limits
-
-Revision ID: 2b90f3af54b8
-Revises: 9a0296d7421e
-Create Date: 2026-01-03 16:55:30.449692
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "2b90f3af54b8"
-down_revision = "9a0296d7421e"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.create_table(
-        "tenant_usage",
-        sa.Column("id", sa.Integer(), nullable=False),
-        sa.Column(
-            "window_start", sa.DateTime(timezone=True), nullable=False, index=True
-        ),
-        sa.Column("llm_cost_cents", sa.Float(), nullable=False, server_default="0.0"),
-        sa.Column("chunks_indexed", sa.Integer(), nullable=False, server_default="0"),
-        sa.Column("api_calls", sa.Integer(), nullable=False, server_default="0"),
-        sa.Column(
-            "non_streaming_api_calls", sa.Integer(), nullable=False, server_default="0"
-        ),
-        sa.Column(
-            "updated_at",
-            sa.DateTime(timezone=True),
-            server_default=sa.func.now(),
-            nullable=True,
-        ),
-        sa.PrimaryKeyConstraint("id"),
-        sa.UniqueConstraint("window_start", name="uq_tenant_usage_window"),
-    )
-
-
-def downgrade() -> None:
-    op.drop_index("ix_tenant_usage_window_start", table_name="tenant_usage")
-    op.drop_table("tenant_usage")
--- a/backend/alembic/versions/2c2430828bdf_add_unique_constraint_to_inputprompt_.py
+++ b/backend/alembic/versions/2c2430828bdf_add_unique_constraint_to_inputprompt_.py
@@ -1,42 +0,0 @@
-"""add_unique_constraint_to_inputprompt_prompt_user_id
-
-Revision ID: 2c2430828bdf
-Revises: fb80bdd256de
-Create Date: 2026-01-20 16:01:54.314805
-
-"""
-
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "2c2430828bdf"
-down_revision = "fb80bdd256de"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Create unique constraint on (prompt, user_id) for user-owned prompts
-    # This ensures each user can only have one shortcut with a given name
-    op.create_unique_constraint(
-        "uq_inputprompt_prompt_user_id",
-        "inputprompt",
-        ["prompt", "user_id"],
-    )
-
-    # Create partial unique index for public prompts (where user_id IS NULL)
-    # PostgreSQL unique constraints don't enforce uniqueness for NULL values,
-    # so we need a partial index to ensure public prompt names are also unique
-    op.execute(
-        """
-        CREATE UNIQUE INDEX uq_inputprompt_prompt_public
-        ON inputprompt (prompt)
-        WHERE user_id IS NULL
-        """
-    )
-
-
-def downgrade() -> None:
-    op.execute("DROP INDEX IF EXISTS uq_inputprompt_prompt_public")
-    op.drop_constraint("uq_inputprompt_prompt_user_id", "inputprompt", type_="unique")
--- a/backend/alembic/versions/41fa44bef321_remove_default_prompt_shortcuts.py
+++ b/backend/alembic/versions/41fa44bef321_remove_default_prompt_shortcuts.py
@@ -1,29 +0,0 @@
-"""remove default prompt shortcuts
-
-Revision ID: 41fa44bef321
-Revises: 2c2430828bdf
-Create Date: 2025-01-21
-
-"""
-
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "41fa44bef321"
-down_revision = "2c2430828bdf"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Delete any user associations for the default prompts first (foreign key constraint)
-    op.execute(
-        "DELETE FROM inputprompt__user WHERE input_prompt_id IN (SELECT id FROM inputprompt WHERE id < 0)"
-    )
-    # Delete the pre-seeded default prompt shortcuts (they have negative IDs)
-    op.execute("DELETE FROM inputprompt WHERE id < 0")
-
-
-def downgrade() -> None:
-    # We don't restore the default prompts on downgrade
-    pass
--- a/backend/alembic/versions/47a07e1a38f1_fix_invalid_model_configurations_state.py
+++ b/backend/alembic/versions/47a07e1a38f1_fix_invalid_model_configurations_state.py
@@ -11,7 +11,7 @@ from pydantic import BaseModel, ConfigDict
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql

-from onyx.llm.well_known_providers.llm_provider_options import (
+from onyx.llm.llm_provider_options import (
    fetch_model_names_for_provider_as_set,
    fetch_visible_model_names_for_provider_as_set,
 )
--- a/backend/alembic/versions/505c488f6662_merge_default_assistants_into_unified.py
+++ b/backend/alembic/versions/505c488f6662_merge_default_assistants_into_unified.py
@@ -85,122 +85,103 @@ class UserRow(NamedTuple):
 def upgrade() -> None:
    conn = op.get_bind()

-    # Step 1: Create or update the unified assistant (ID 0)
-    search_assistant = conn.execute(
-        sa.text("SELECT * FROM persona WHERE id = 0")
-    ).fetchone()
+    # Start transaction
+    conn.execute(sa.text("BEGIN"))

-    if search_assistant:
-        # Update existing Search assistant to be the unified assistant
+    try:
+        # Step 1: Create or update the unified assistant (ID 0)
+        search_assistant = conn.execute(
+            sa.text("SELECT * FROM persona WHERE id = 0")
+        ).fetchone()
+
+        if search_assistant:
+            # Update existing Search assistant to be the unified assistant
+            conn.execute(
+                sa.text(
+                    """
+                    UPDATE persona
+                    SET name = :name,
+                        description = :description,
+                        system_prompt = :system_prompt,
+                        num_chunks = :num_chunks,
+                        is_default_persona = true,
+                        is_visible = true,
+                        deleted = false,
+                        display_priority = :display_priority,
+                        llm_filter_extraction = :llm_filter_extraction,
+                        llm_relevance_filter = :llm_relevance_filter,
+                        recency_bias = :recency_bias,
+                        chunks_above = :chunks_above,
+                        chunks_below = :chunks_below,
+                        datetime_aware = :datetime_aware,
+                        starter_messages = null
+                    WHERE id = 0
+                """
+                ),
+                INSERT_DICT,
+            )
+        else:
+            # Create new unified assistant with ID 0
+            conn.execute(
+                sa.text(
+                    """
+                    INSERT INTO persona (
+                        id, name, description, system_prompt, num_chunks,
+                        is_default_persona, is_visible, deleted, display_priority,
+                        llm_filter_extraction, llm_relevance_filter, recency_bias,
+                        chunks_above, chunks_below, datetime_aware, starter_messages,
+                        builtin_persona
+                    ) VALUES (
+                        0, :name, :description, :system_prompt, :num_chunks,
+                        true, true, false, :display_priority, :llm_filter_extraction,
+                        :llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,
+                        :datetime_aware, null, true
+                    )
+                """
+                ),
+                INSERT_DICT,
+            )
+
+        # Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)
        conn.execute(
            sa.text(
                """
                UPDATE persona
-                SET name = :name,
-                    description = :description,
-                    system_prompt = :system_prompt,
-                    num_chunks = :num_chunks,
-                    is_default_persona = true,
-                    is_visible = true,
-                    deleted = false,
-                    display_priority = :display_priority,
-                    llm_filter_extraction = :llm_filter_extraction,
-                    llm_relevance_filter = :llm_relevance_filter,
-                    recency_bias = :recency_bias,
-                    chunks_above = :chunks_above,
-                    chunks_below = :chunks_below,
-                    datetime_aware = :datetime_aware,
-                    starter_messages = null
-                WHERE id = 0
+                SET deleted = true, is_visible = false, is_default_persona = false
+                WHERE builtin_persona = true AND id != 0
            """
-            ),
-            INSERT_DICT,
-        )
-    else:
-        # Create new unified assistant with ID 0
-        conn.execute(
-            sa.text(
-                """
-                INSERT INTO persona (
-                    id, name, description, system_prompt, num_chunks,
-                    is_default_persona, is_visible, deleted, display_priority,
-                    llm_filter_extraction, llm_relevance_filter, recency_bias,
-                    chunks_above, chunks_below, datetime_aware, starter_messages,
-                    builtin_persona
-                ) VALUES (
-                    0, :name, :description, :system_prompt, :num_chunks,
-                    true, true, false, :display_priority, :llm_filter_extraction,
-                    :llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,
-                    :datetime_aware, null, true
-                )
-            """
-            ),
-            INSERT_DICT,
+            )
        )

-    # Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)
-    conn.execute(
-        sa.text(
-            """
-            UPDATE persona
-            SET deleted = true, is_visible = false, is_default_persona = false
-            WHERE builtin_persona = true AND id != 0
-        """
-        )
-    )
+        # Step 3: Add all built-in tools to the unified assistant
+        # First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool
+        search_tool = conn.execute(
+            sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'")
+        ).fetchone()

-    # Step 3: Add all built-in tools to the unified assistant
-    # First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool
-    search_tool = conn.execute(
-        sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'")
-    ).fetchone()
+        if not search_tool:
+            raise ValueError(
+                "SearchTool not found in database. Ensure tools migration has run first."
+            )

-    if not search_tool:
-        raise ValueError(
-            "SearchTool not found in database. Ensure tools migration has run first."
-        )
+        image_gen_tool = conn.execute(
+            sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'")
+        ).fetchone()

-    image_gen_tool = conn.execute(
-        sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'")
-    ).fetchone()
+        if not image_gen_tool:
+            raise ValueError(
+                "ImageGenerationTool not found in database. Ensure tools migration has run first."
+            )

-    if not image_gen_tool:
-        raise ValueError(
-            "ImageGenerationTool not found in database. Ensure tools migration has run first."
-        )
+        # WebSearchTool is optional - may not be configured
+        web_search_tool = conn.execute(
+            sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'")
+        ).fetchone()

-    # WebSearchTool is optional - may not be configured
-    web_search_tool = conn.execute(
-        sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'")
-    ).fetchone()
+        # Clear existing tool associations for persona 0
+        conn.execute(sa.text("DELETE FROM persona__tool WHERE persona_id = 0"))

-    # Clear existing tool associations for persona 0
-    conn.execute(sa.text("DELETE FROM persona__tool WHERE persona_id = 0"))
-
-    # Add tools to the unified assistant
-    conn.execute(
-        sa.text(
-            """
-            INSERT INTO persona__tool (persona_id, tool_id)
-            VALUES (0, :tool_id)
-            ON CONFLICT DO NOTHING
-        """
-        ),
-        {"tool_id": search_tool[0]},
-    )
-
-    conn.execute(
-        sa.text(
-            """
-            INSERT INTO persona__tool (persona_id, tool_id)
-            VALUES (0, :tool_id)
-            ON CONFLICT DO NOTHING
-        """
-        ),
-        {"tool_id": image_gen_tool[0]},
-    )
-
-    if web_search_tool:
+        # Add tools to the unified assistant
        conn.execute(
            sa.text(
                """
@@ -209,148 +190,191 @@ def upgrade() -> None:
                ON CONFLICT DO NOTHING
            """
            ),
-            {"tool_id": web_search_tool[0]},
+            {"tool_id": search_tool[0]},
        )

-    # Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant
-    conn.execute(
-        sa.text(
+        conn.execute(
+            sa.text(
+                """
+                INSERT INTO persona__tool (persona_id, tool_id)
+                VALUES (0, :tool_id)
+                ON CONFLICT DO NOTHING
            """
-            UPDATE chat_session
-            SET persona_id = 0
-            WHERE persona_id IN (
-                SELECT id FROM persona WHERE builtin_persona = true AND id != 0
-            )
-        """
+            ),
+            {"tool_id": image_gen_tool[0]},
        )
-    )

-    # Step 5: Migrate user preferences - remove references to all builtin assistants
-    # First, get all builtin assistant IDs (except 0)
-    builtin_assistants_result = conn.execute(
-        sa.text(
-            """
-            SELECT id FROM persona
-            WHERE builtin_persona = true AND id != 0
-        """
-        )
-    ).fetchall()
-    builtin_assistant_ids = [row[0] for row in builtin_assistants_result]
-
-    # Get all users with preferences
-    users_result = conn.execute(
-        sa.text(
-            """
-            SELECT id, chosen_assistants, visible_assistants,
-                   hidden_assistants, pinned_assistants
-            FROM "user"
-        """
-        )
-    ).fetchall()
-
-    for user_row in users_result:
-        user = UserRow(*user_row)
-        user_id: UUID = user.id
-        updates: dict[str, Any] = {}
-
-        # Remove all builtin assistants from chosen_assistants
-        if user.chosen_assistants:
-            new_chosen: list[int] = [
-                assistant_id
-                for assistant_id in user.chosen_assistants
-                if assistant_id not in builtin_assistant_ids
-            ]
-            if new_chosen != user.chosen_assistants:
-                updates["chosen_assistants"] = json.dumps(new_chosen)
-
-        # Remove all builtin assistants from visible_assistants
-        if user.visible_assistants:
-            new_visible: list[int] = [
-                assistant_id
-                for assistant_id in user.visible_assistants
-                if assistant_id not in builtin_assistant_ids
-            ]
-            if new_visible != user.visible_assistants:
-                updates["visible_assistants"] = json.dumps(new_visible)
-
-        # Add all builtin assistants to hidden_assistants
-        if user.hidden_assistants:
-            new_hidden: list[int] = list(user.hidden_assistants)
-            for old_id in builtin_assistant_ids:
-                if old_id not in new_hidden:
-                    new_hidden.append(old_id)
-            if new_hidden != user.hidden_assistants:
-                updates["hidden_assistants"] = json.dumps(new_hidden)
-        else:
-            updates["hidden_assistants"] = json.dumps(builtin_assistant_ids)
-
-        # Remove all builtin assistants from pinned_assistants
-        if user.pinned_assistants:
-            new_pinned: list[int] = [
-                assistant_id
-                for assistant_id in user.pinned_assistants
-                if assistant_id not in builtin_assistant_ids
-            ]
-            if new_pinned != user.pinned_assistants:
-                updates["pinned_assistants"] = json.dumps(new_pinned)
-
-        # Apply updates if any
-        if updates:
-            set_clause = ", ".join([f"{k} = :{k}" for k in updates.keys()])
-            updates["user_id"] = str(user_id)  # Convert UUID to string for SQL
+        if web_search_tool:
            conn.execute(
-                sa.text(f'UPDATE "user" SET {set_clause} WHERE id = :user_id'),
-                updates,
+                sa.text(
+                    """
+                    INSERT INTO persona__tool (persona_id, tool_id)
+                    VALUES (0, :tool_id)
+                    ON CONFLICT DO NOTHING
+                """
+                ),
+                {"tool_id": web_search_tool[0]},
            )

+        # Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant
+        conn.execute(
+            sa.text(
+                """
+                UPDATE chat_session
+                SET persona_id = 0
+                WHERE persona_id IN (
+                    SELECT id FROM persona WHERE builtin_persona = true AND id != 0
+                )
+            """
+            )
+        )
+
+        # Step 5: Migrate user preferences - remove references to all builtin assistants
+        # First, get all builtin assistant IDs (except 0)
+        builtin_assistants_result = conn.execute(
+            sa.text(
+                """
+                SELECT id FROM persona
+                WHERE builtin_persona = true AND id != 0
+            """
+            )
+        ).fetchall()
+        builtin_assistant_ids = [row[0] for row in builtin_assistants_result]
+
+        # Get all users with preferences
+        users_result = conn.execute(
+            sa.text(
+                """
+                SELECT id, chosen_assistants, visible_assistants,
+                       hidden_assistants, pinned_assistants
+                FROM "user"
+            """
+            )
+        ).fetchall()
+
+        for user_row in users_result:
+            user = UserRow(*user_row)
+            user_id: UUID = user.id
+            updates: dict[str, Any] = {}
+
+            # Remove all builtin assistants from chosen_assistants
+            if user.chosen_assistants:
+                new_chosen: list[int] = [
+                    assistant_id
+                    for assistant_id in user.chosen_assistants
+                    if assistant_id not in builtin_assistant_ids
+                ]
+                if new_chosen != user.chosen_assistants:
+                    updates["chosen_assistants"] = json.dumps(new_chosen)
+
+            # Remove all builtin assistants from visible_assistants
+            if user.visible_assistants:
+                new_visible: list[int] = [
+                    assistant_id
+                    for assistant_id in user.visible_assistants
+                    if assistant_id not in builtin_assistant_ids
+                ]
+                if new_visible != user.visible_assistants:
+                    updates["visible_assistants"] = json.dumps(new_visible)
+
+            # Add all builtin assistants to hidden_assistants
+            if user.hidden_assistants:
+                new_hidden: list[int] = list(user.hidden_assistants)
+                for old_id in builtin_assistant_ids:
+                    if old_id not in new_hidden:
+                        new_hidden.append(old_id)
+                if new_hidden != user.hidden_assistants:
+                    updates["hidden_assistants"] = json.dumps(new_hidden)
+            else:
+                updates["hidden_assistants"] = json.dumps(builtin_assistant_ids)
+
+            # Remove all builtin assistants from pinned_assistants
+            if user.pinned_assistants:
+                new_pinned: list[int] = [
+                    assistant_id
+                    for assistant_id in user.pinned_assistants
+                    if assistant_id not in builtin_assistant_ids
+                ]
+                if new_pinned != user.pinned_assistants:
+                    updates["pinned_assistants"] = json.dumps(new_pinned)
+
+            # Apply updates if any
+            if updates:
+                set_clause = ", ".join([f"{k} = :{k}" for k in updates.keys()])
+                updates["user_id"] = str(user_id)  # Convert UUID to string for SQL
+                conn.execute(
+                    sa.text(f'UPDATE "user" SET {set_clause} WHERE id = :user_id'),
+                    updates,
+                )
+
+        # Commit transaction
+        conn.execute(sa.text("COMMIT"))
+
+    except Exception as e:
+        # Rollback on error
+        conn.execute(sa.text("ROLLBACK"))
+        raise e
+

 def downgrade() -> None:
    conn = op.get_bind()

-    # Only restore General (ID -1) and Art (ID -3) assistants
-    # Step 1: Keep Search assistant (ID 0) as default but restore original state
-    conn.execute(
-        sa.text(
+    # Start transaction
+    conn.execute(sa.text("BEGIN"))
+
+    try:
+        # Only restore General (ID -1) and Art (ID -3) assistants
+        # Step 1: Keep Search assistant (ID 0) as default but restore original state
+        conn.execute(
+            sa.text(
+                """
+                UPDATE persona
+                SET is_default_persona = true,
+                    is_visible = true,
+                    deleted = false
+                WHERE id = 0
            """
-            UPDATE persona
-            SET is_default_persona = true,
-                is_visible = true,
-                deleted = false
-            WHERE id = 0
-        """
+            )
        )
-    )

-    # Step 2: Restore General assistant (ID -1)
-    conn.execute(
-        sa.text(
+        # Step 2: Restore General assistant (ID -1)
+        conn.execute(
+            sa.text(
+                """
+                UPDATE persona
+                SET deleted = false,
+                    is_visible = true,
+                    is_default_persona = true
+                WHERE id = :general_assistant_id
            """
-            UPDATE persona
-            SET deleted = false,
-                is_visible = true,
-                is_default_persona = true
-            WHERE id = :general_assistant_id
-        """
-        ),
-        {"general_assistant_id": GENERAL_ASSISTANT_ID},
-    )
+            ),
+            {"general_assistant_id": GENERAL_ASSISTANT_ID},
+        )

-    # Step 3: Restore Art assistant (ID -3)
-    conn.execute(
-        sa.text(
+        # Step 3: Restore Art assistant (ID -3)
+        conn.execute(
+            sa.text(
+                """
+                UPDATE persona
+                SET deleted = false,
+                    is_visible = true,
+                    is_default_persona = true
+                WHERE id = :art_assistant_id
            """
-            UPDATE persona
-            SET deleted = false,
-                is_visible = true,
-                is_default_persona = true
-            WHERE id = :art_assistant_id
-        """
-        ),
-        {"art_assistant_id": ART_ASSISTANT_ID},
-    )
+            ),
+            {"art_assistant_id": ART_ASSISTANT_ID},
+        )

-    # Note: We don't restore the original tool associations, names, or descriptions
-    # as those would require more complex logic to determine original state.
-    # We also cannot restore original chat session persona_ids as we don't
-    # have the original mappings.
-    # Other builtin assistants remain deleted as per the requirement.
+        # Note: We don't restore the original tool associations, names, or descriptions
+        # as those would require more complex logic to determine original state.
+        # We also cannot restore original chat session persona_ids as we don't
+        # have the original mappings.
+        # Other builtin assistants remain deleted as per the requirement.
+
+        # Commit transaction
+        conn.execute(sa.text("COMMIT"))
+
+    except Exception as e:
+        # Rollback on error
+        conn.execute(sa.text("ROLLBACK"))
+        raise e
--- a/backend/alembic/versions/5c3dca366b35_backend_driven_notification_details.py
+++ b/backend/alembic/versions/5c3dca366b35_backend_driven_notification_details.py
@@ -1,35 +0,0 @@
-"""backend driven notification details
-
-Revision ID: 5c3dca366b35
-Revises: 9087b548dd69
-Create Date: 2026-01-06 16:03:11.413724
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "5c3dca366b35"
-down_revision = "9087b548dd69"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "notification",
-        sa.Column(
-            "title", sa.String(), nullable=False, server_default="New Notification"
-        ),
-    )
-    op.add_column(
-        "notification",
-        sa.Column("description", sa.String(), nullable=True, server_default=""),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("notification", "title")
-    op.drop_column("notification", "description")
--- a/backend/alembic/versions/699221885109_nullify_default_task_prompt.py
+++ b/backend/alembic/versions/699221885109_nullify_default_task_prompt.py
@@ -1,75 +0,0 @@
-"""nullify_default_task_prompt
-
-Revision ID: 699221885109
-Revises: 7e490836d179
-Create Date: 2025-12-30 10:00:00.000000
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "699221885109"
-down_revision = "7e490836d179"
-branch_labels = None
-depends_on = None
-
-DEFAULT_PERSONA_ID = 0
-
-
-def upgrade() -> None:
-    # Make task_prompt column nullable
-    # Note: The model had nullable=True but the DB column was NOT NULL until this point
-    op.alter_column(
-        "persona",
-        "task_prompt",
-        nullable=True,
-    )
-
-    # Set task_prompt to NULL for the default persona
-    conn = op.get_bind()
-    conn.execute(
-        sa.text(
-            """
-            UPDATE persona
-            SET task_prompt = NULL
-            WHERE id = :persona_id
-            """
-        ),
-        {"persona_id": DEFAULT_PERSONA_ID},
-    )
-
-
-def downgrade() -> None:
-    # Restore task_prompt to empty string for the default persona
-    conn = op.get_bind()
-    conn.execute(
-        sa.text(
-            """
-            UPDATE persona
-            SET task_prompt = ''
-            WHERE id = :persona_id AND task_prompt IS NULL
-            """
-        ),
-        {"persona_id": DEFAULT_PERSONA_ID},
-    )
-
-    # Set any remaining NULL task_prompts to empty string before making non-nullable
-    conn.execute(
-        sa.text(
-            """
-            UPDATE persona
-            SET task_prompt = ''
-            WHERE task_prompt IS NULL
-            """
-        )
-    )
-
-    # Revert task_prompt column to not nullable
-    op.alter_column(
-        "persona",
-        "task_prompt",
-        nullable=False,
-    )
--- a/backend/alembic/versions/7206234e012a_add_image_generation_config_table.py
+++ b/backend/alembic/versions/7206234e012a_add_image_generation_config_table.py
@@ -1,54 +0,0 @@
-"""add image generation config table
-
-Revision ID: 7206234e012a
-Revises: 699221885109
-Create Date: 2025-12-21 00:00:00.000000
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "7206234e012a"
-down_revision = "699221885109"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.create_table(
-        "image_generation_config",
-        sa.Column("image_provider_id", sa.String(), primary_key=True),
-        sa.Column("model_configuration_id", sa.Integer(), nullable=False),
-        sa.Column("is_default", sa.Boolean(), nullable=False),
-        sa.ForeignKeyConstraint(
-            ["model_configuration_id"],
-            ["model_configuration.id"],
-            ondelete="CASCADE",
-        ),
-    )
-    op.create_index(
-        "ix_image_generation_config_is_default",
-        "image_generation_config",
-        ["is_default"],
-        unique=False,
-    )
-    op.create_index(
-        "ix_image_generation_config_model_configuration_id",
-        "image_generation_config",
-        ["model_configuration_id"],
-        unique=False,
-    )
-
-
-def downgrade() -> None:
-    op.drop_index(
-        "ix_image_generation_config_model_configuration_id",
-        table_name="image_generation_config",
-    )
-    op.drop_index(
-        "ix_image_generation_config_is_default", table_name="image_generation_config"
-    )
-    op.drop_table("image_generation_config")
--- a/backend/alembic/versions/73e9983e5091_add_search_query_table.py
+++ b/backend/alembic/versions/73e9983e5091_add_search_query_table.py
@@ -1,47 +0,0 @@
-"""add_search_query_table
-
-Revision ID: 73e9983e5091
-Revises: d1b637d7050a
-Create Date: 2026-01-14 14:16:52.837489
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = "73e9983e5091"
-down_revision = "d1b637d7050a"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.create_table(
-        "search_query",
-        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
-        sa.Column(
-            "user_id",
-            postgresql.UUID(as_uuid=True),
-            sa.ForeignKey("user.id"),
-            nullable=False,
-        ),
-        sa.Column("query", sa.String(), nullable=False),
-        sa.Column("query_expansions", postgresql.ARRAY(sa.String()), nullable=True),
-        sa.Column(
-            "created_at",
-            sa.DateTime(timezone=True),
-            nullable=False,
-            server_default=sa.func.now(),
-        ),
-    )
-
-    op.create_index("ix_search_query_user_id", "search_query", ["user_id"])
-    op.create_index("ix_search_query_created_at", "search_query", ["created_at"])
-
-
-def downgrade() -> None:
-    op.drop_index("ix_search_query_created_at", table_name="search_query")
-    op.drop_index("ix_search_query_user_id", table_name="search_query")
-    op.drop_table("search_query")
--- a/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py
+++ b/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py
@@ -10,7 +10,8 @@ from alembic import op
 import sqlalchemy as sa

 from onyx.db.models import IndexModelStatus
-from onyx.context.search.enums import RecencyBiasSetting, SearchType
+from onyx.context.search.enums import RecencyBiasSetting
+from onyx.context.search.enums import SearchType

 # revision identifiers, used by Alembic.
 revision = "776b3bbe9092"
--- a/backend/alembic/versions/7a70b7664e37_add_model_configuration_table.py
+++ b/backend/alembic/versions/7a70b7664e37_add_model_configuration_table.py
@@ -10,7 +10,7 @@ from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql

-from onyx.llm.well_known_providers.llm_provider_options import (
+from onyx.llm.llm_provider_options import (
    fetch_model_names_for_provider_as_set,
    fetch_visible_model_names_for_provider_as_set,
 )
--- a/backend/alembic/versions/7e490836d179_nullify_default_system_prompt.py
+++ b/backend/alembic/versions/7e490836d179_nullify_default_system_prompt.py
@@ -1,80 +0,0 @@
-"""nullify_default_system_prompt
-
-Revision ID: 7e490836d179
-Revises: c1d2e3f4a5b6
-Create Date: 2025-12-29 16:54:36.635574
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "7e490836d179"
-down_revision = "c1d2e3f4a5b6"
-branch_labels = None
-depends_on = None
-
-
-# This is the default system prompt from the previous migration (87c52ec39f84)
-# ruff: noqa: E501, W605 start
-PREVIOUS_DEFAULT_SYSTEM_PROMPT = """
-You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
-
-The current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]
-
-# Response Style
-You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
-You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
-For code you prefer to use Markdown and specify the language.
-You can use horizontal rules (---) to separate sections of your responses.
-You can use Markdown tables to format your responses for data, lists, and other structured information.
-""".lstrip()
-# ruff: noqa: E501, W605 end
-
-
-def upgrade() -> None:
-    # Make system_prompt column nullable (model already has nullable=True but DB doesn't)
-    op.alter_column(
-        "persona",
-        "system_prompt",
-        nullable=True,
-    )
-
-    # Set system_prompt to NULL where it matches the previous default
-    conn = op.get_bind()
-    conn.execute(
-        sa.text(
-            """
-            UPDATE persona
-            SET system_prompt = NULL
-            WHERE system_prompt = :previous_default
-            """
-        ),
-        {"previous_default": PREVIOUS_DEFAULT_SYSTEM_PROMPT},
-    )
-
-
-def downgrade() -> None:
-    # Restore the default system prompt for personas that have NULL
-    # Note: This may restore the prompt to personas that originally had NULL
-    # before this migration, but there's no way to distinguish them
-    conn = op.get_bind()
-    conn.execute(
-        sa.text(
-            """
-            UPDATE persona
-            SET system_prompt = :previous_default
-            WHERE system_prompt IS NULL
-            """
-        ),
-        {"previous_default": PREVIOUS_DEFAULT_SYSTEM_PROMPT},
-    )
-
-    # Revert system_prompt column to not nullable
-    op.alter_column(
-        "persona",
-        "system_prompt",
-        nullable=False,
-    )
--- a/backend/alembic/versions/8405ca81cc83_notifications_constraint.py
+++ b/backend/alembic/versions/8405ca81cc83_notifications_constraint.py
@@ -1,49 +0,0 @@
-"""notifications constraint, sort index, and cleanup old notifications
-
-Revision ID: 8405ca81cc83
-Revises: a3c1a7904cd0
-Create Date: 2026-01-07 16:43:44.855156
-
-"""
-
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "8405ca81cc83"
-down_revision = "a3c1a7904cd0"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Create unique index for notification deduplication.
-    # This enables atomic ON CONFLICT DO NOTHING inserts in batch_create_notifications.
-    #
-    # Uses COALESCE to handle NULL additional_data (NULLs are normally distinct
-    # in unique constraints, but we want NULL == NULL for deduplication).
-    # The '{}' represents an empty JSONB object as the NULL replacement.
-
-    # Clean up legacy notifications first
-    op.execute("DELETE FROM notification WHERE title = 'New Notification'")
-
-    op.execute(
-        """
-        CREATE UNIQUE INDEX IF NOT EXISTS ix_notification_user_type_data
-        ON notification (user_id, notif_type, COALESCE(additional_data, '{}'::jsonb))
-        """
-    )
-
-    # Create index for efficient notification sorting by user
-    # Covers: WHERE user_id = ? ORDER BY dismissed, first_shown DESC
-    op.execute(
-        """
-        CREATE INDEX IF NOT EXISTS ix_notification_user_sort
-        ON notification (user_id, dismissed, first_shown DESC)
-        """
-    )
-
-
-def downgrade() -> None:
-    op.execute("DROP INDEX IF EXISTS ix_notification_user_type_data")
-    op.execute("DROP INDEX IF EXISTS ix_notification_user_sort")
--- a/backend/alembic/versions/8b5ce697290e_add_discord_bot_tables.py
+++ b/backend/alembic/versions/8b5ce697290e_add_discord_bot_tables.py
@@ -1,116 +0,0 @@
-"""Add Discord bot tables
-
-Revision ID: 8b5ce697290e
-Revises: a1b2c3d4e5f7
-Create Date: 2025-01-14
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "8b5ce697290e"
-down_revision = "a1b2c3d4e5f7"
-branch_labels: None = None
-depends_on: None = None
-
-
-def upgrade() -> None:
-    # DiscordBotConfig (singleton table - one per tenant)
-    op.create_table(
-        "discord_bot_config",
-        sa.Column(
-            "id",
-            sa.String(),
-            primary_key=True,
-            server_default=sa.text("'SINGLETON'"),
-        ),
-        sa.Column("bot_token", sa.LargeBinary(), nullable=False),  # EncryptedString
-        sa.Column(
-            "created_at",
-            sa.DateTime(timezone=True),
-            server_default=sa.func.now(),
-            nullable=False,
-        ),
-        sa.CheckConstraint("id = 'SINGLETON'", name="ck_discord_bot_config_singleton"),
-    )
-
-    # DiscordGuildConfig
-    op.create_table(
-        "discord_guild_config",
-        sa.Column("id", sa.Integer(), primary_key=True),
-        sa.Column("guild_id", sa.BigInteger(), nullable=True, unique=True),
-        sa.Column("guild_name", sa.String(), nullable=True),
-        sa.Column("registration_key", sa.String(), nullable=False, unique=True),
-        sa.Column("registered_at", sa.DateTime(timezone=True), nullable=True),
-        sa.Column(
-            "default_persona_id",
-            sa.Integer(),
-            sa.ForeignKey("persona.id", ondelete="SET NULL"),
-            nullable=True,
-        ),
-        sa.Column(
-            "enabled", sa.Boolean(), server_default=sa.text("true"), nullable=False
-        ),
-    )
-
-    # DiscordChannelConfig
-    op.create_table(
-        "discord_channel_config",
-        sa.Column("id", sa.Integer(), primary_key=True),
-        sa.Column(
-            "guild_config_id",
-            sa.Integer(),
-            sa.ForeignKey("discord_guild_config.id", ondelete="CASCADE"),
-            nullable=False,
-        ),
-        sa.Column("channel_id", sa.BigInteger(), nullable=False),
-        sa.Column("channel_name", sa.String(), nullable=False),
-        sa.Column(
-            "channel_type",
-            sa.String(20),
-            server_default=sa.text("'text'"),
-            nullable=False,
-        ),
-        sa.Column(
-            "is_private",
-            sa.Boolean(),
-            server_default=sa.text("false"),
-            nullable=False,
-        ),
-        sa.Column(
-            "thread_only_mode",
-            sa.Boolean(),
-            server_default=sa.text("false"),
-            nullable=False,
-        ),
-        sa.Column(
-            "require_bot_invocation",
-            sa.Boolean(),
-            server_default=sa.text("true"),
-            nullable=False,
-        ),
-        sa.Column(
-            "persona_override_id",
-            sa.Integer(),
-            sa.ForeignKey("persona.id", ondelete="SET NULL"),
-            nullable=True,
-        ),
-        sa.Column(
-            "enabled", sa.Boolean(), server_default=sa.text("false"), nullable=False
-        ),
-    )
-
-    # Unique constraint: one config per channel per guild
-    op.create_unique_constraint(
-        "uq_discord_channel_guild_channel",
-        "discord_channel_config",
-        ["guild_config_id", "channel_id"],
-    )
-
-
-def downgrade() -> None:
-    op.drop_table("discord_channel_config")
-    op.drop_table("discord_guild_config")
-    op.drop_table("discord_bot_config")
--- a/backend/alembic/versions/9087b548dd69_seed_default_image_gen_config.py
+++ b/backend/alembic/versions/9087b548dd69_seed_default_image_gen_config.py
@@ -1,136 +0,0 @@
-"""seed_default_image_gen_config
-
-Revision ID: 9087b548dd69
-Revises: 2b90f3af54b8
-Create Date: 2026-01-05 00:00:00.000000
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "9087b548dd69"
-down_revision = "2b90f3af54b8"
-branch_labels = None
-depends_on = None
-
-# Constants for default image generation config
-# Source: web/src/app/admin/configuration/image-generation/constants.ts
-IMAGE_PROVIDER_ID = "openai_gpt_image_1"
-MODEL_NAME = "gpt-image-1"
-PROVIDER_NAME = "openai"
-
-
-def upgrade() -> None:
-    conn = op.get_bind()
-
-    # Check if image_generation_config table already has records
-    existing_configs = (
-        conn.execute(sa.text("SELECT COUNT(*) FROM image_generation_config")).scalar()
-        or 0
-    )
-
-    if existing_configs > 0:
-        # Skip if configs already exist - user may have configured manually
-        return
-
-    # Find the first OpenAI LLM provider
-    openai_provider = conn.execute(
-        sa.text(
-            """
-            SELECT id, api_key
-            FROM llm_provider
-            WHERE provider = :provider
-            ORDER BY id
-            LIMIT 1
-            """
-        ),
-        {"provider": PROVIDER_NAME},
-    ).fetchone()
-
-    if not openai_provider:
-        # No OpenAI provider found - nothing to do
-        return
-
-    source_provider_id, api_key = openai_provider
-
-    # Create new LLM provider for image generation (clone only api_key)
-    result = conn.execute(
-        sa.text(
-            """
-            INSERT INTO llm_provider (
-                name, provider, api_key, api_base, api_version,
-                deployment_name, default_model_name, is_public,
-                is_default_provider, is_default_vision_provider, is_auto_mode
-            )
-            VALUES (
-                :name, :provider, :api_key, NULL, NULL,
-                NULL, :default_model_name, :is_public,
-                NULL, NULL, :is_auto_mode
-            )
-            RETURNING id
-            """
-        ),
-        {
-            "name": f"Image Gen - {IMAGE_PROVIDER_ID}",
-            "provider": PROVIDER_NAME,
-            "api_key": api_key,
-            "default_model_name": MODEL_NAME,
-            "is_public": True,
-            "is_auto_mode": False,
-        },
-    )
-    new_provider_id = result.scalar()
-
-    # Create model configuration
-    result = conn.execute(
-        sa.text(
-            """
-            INSERT INTO model_configuration (
-                llm_provider_id, name, is_visible, max_input_tokens,
-                supports_image_input, display_name
-            )
-            VALUES (
-                :llm_provider_id, :name, :is_visible, :max_input_tokens,
-                :supports_image_input, :display_name
-            )
-            RETURNING id
-            """
-        ),
-        {
-            "llm_provider_id": new_provider_id,
-            "name": MODEL_NAME,
-            "is_visible": True,
-            "max_input_tokens": None,
-            "supports_image_input": False,
-            "display_name": None,
-        },
-    )
-    model_config_id = result.scalar()
-
-    # Create image generation config
-    conn.execute(
-        sa.text(
-            """
-            INSERT INTO image_generation_config (
-                image_provider_id, model_configuration_id, is_default
-            )
-            VALUES (
-                :image_provider_id, :model_configuration_id, :is_default
-            )
-            """
-        ),
-        {
-            "image_provider_id": IMAGE_PROVIDER_ID,
-            "model_configuration_id": model_config_id,
-            "is_default": True,
-        },
-    )
-
-
-def downgrade() -> None:
-    # We don't remove the config on downgrade since it's safe to keep around
-    # If we upgrade again, it will be a no-op due to the existing records check
-    pass
--- a/backend/alembic/versions/9a0296d7421e_add_is_auto_mode_to_llm_provider.py
+++ b/backend/alembic/versions/9a0296d7421e_add_is_auto_mode_to_llm_provider.py
@@ -1,33 +0,0 @@
-"""add_is_auto_mode_to_llm_provider
-
-Revision ID: 9a0296d7421e
-Revises: 7206234e012a
-Create Date: 2025-12-17 18:14:29.620981
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "9a0296d7421e"
-down_revision = "7206234e012a"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "llm_provider",
-        sa.Column(
-            "is_auto_mode",
-            sa.Boolean(),
-            nullable=False,
-            server_default="false",
-        ),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("llm_provider", "is_auto_mode")
--- a/backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
+++ b/backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
@@ -234,8 +234,6 @@ def downgrade() -> None:
        if "instructions" in columns:
            op.drop_column("user_project", "instructions")
        op.execute("ALTER TABLE user_project RENAME TO user_folder")
-        # Update NULL descriptions to empty string before setting NOT NULL constraint
-        op.execute("UPDATE user_folder SET description = '' WHERE description IS NULL")
        op.alter_column("user_folder", "description", nullable=False)
        logger.info("Renamed user_project back to user_folder")

--- a/backend/alembic/versions/a01bf2971c5d_update_default_tool_descriptions.py
+++ b/backend/alembic/versions/a01bf2971c5d_update_default_tool_descriptions.py
@@ -42,13 +42,20 @@ TOOL_DESCRIPTIONS = {

 def upgrade() -> None:
    conn = op.get_bind()
-    for tool_id, description in TOOL_DESCRIPTIONS.items():
-        conn.execute(
-            sa.text(
-                "UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
-            ),
-            {"description": description, "tool_id": tool_id},
-        )
+    conn.execute(sa.text("BEGIN"))
+
+    try:
+        for tool_id, description in TOOL_DESCRIPTIONS.items():
+            conn.execute(
+                sa.text(
+                    "UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
+                ),
+                {"description": description, "tool_id": tool_id},
+            )
+        conn.execute(sa.text("COMMIT"))
+    except Exception as e:
+        conn.execute(sa.text("ROLLBACK"))
+        raise e


 def downgrade() -> None:
--- a/backend/alembic/versions/a1b2c3d4e5f7_drop_agent_search_metrics_table.py
+++ b/backend/alembic/versions/a1b2c3d4e5f7_drop_agent_search_metrics_table.py
@@ -1,47 +0,0 @@
-"""drop agent_search_metrics table
-
-Revision ID: a1b2c3d4e5f7
-Revises: 73e9983e5091
-Create Date: 2026-01-17
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = "a1b2c3d4e5f7"
-down_revision = "73e9983e5091"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.drop_table("agent__search_metrics")
-
-
-def downgrade() -> None:
-    op.create_table(
-        "agent__search_metrics",
-        sa.Column("id", sa.Integer(), nullable=False),
-        sa.Column("user_id", sa.UUID(), nullable=True),
-        sa.Column("persona_id", sa.Integer(), nullable=True),
-        sa.Column("agent_type", sa.String(), nullable=False),
-        sa.Column("start_time", sa.DateTime(timezone=True), nullable=False),
-        sa.Column("base_duration_s", sa.Float(), nullable=False),
-        sa.Column("full_duration_s", sa.Float(), nullable=False),
-        sa.Column("base_metrics", postgresql.JSONB(), nullable=True),
-        sa.Column("refined_metrics", postgresql.JSONB(), nullable=True),
-        sa.Column("all_metrics", postgresql.JSONB(), nullable=True),
-        sa.ForeignKeyConstraint(
-            ["user_id"],
-            ["user.id"],
-            ondelete="CASCADE",
-        ),
-        sa.ForeignKeyConstraint(
-            ["persona_id"],
-            ["persona.id"],
-        ),
-        sa.PrimaryKeyConstraint("id"),
-    )
--- a/backend/alembic/versions/a3c1a7904cd0_remove_userfile_related_deprecated_.py
+++ b/backend/alembic/versions/a3c1a7904cd0_remove_userfile_related_deprecated_.py
@@ -1,39 +0,0 @@
-"""remove userfile related deprecated fields
-
-Revision ID: a3c1a7904cd0
-Revises: 5c3dca366b35
-Create Date: 2026-01-06 13:00:30.634396
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "a3c1a7904cd0"
-down_revision = "5c3dca366b35"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.drop_column("user_file", "document_id")
-    op.drop_column("user_file", "document_id_migrated")
-    op.drop_column("connector_credential_pair", "is_user_file")
-
-
-def downgrade() -> None:
-    op.add_column(
-        "connector_credential_pair",
-        sa.Column("is_user_file", sa.Boolean(), nullable=False, server_default="false"),
-    )
-    op.add_column(
-        "user_file",
-        sa.Column("document_id", sa.String(), nullable=True),
-    )
-    op.add_column(
-        "user_file",
-        sa.Column(
-            "document_id_migrated", sa.Boolean(), nullable=False, server_default="true"
-        ),
-    )
--- a/backend/alembic/versions/c1d2e3f4a5b6_add_deep_research_tool.py
+++ b/backend/alembic/versions/c1d2e3f4a5b6_add_deep_research_tool.py
@@ -7,6 +7,7 @@ Create Date: 2025-12-18 16:00:00.000000
 """

 from alembic import op
+from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_DB_NAME
 import sqlalchemy as sa


@@ -18,7 +19,7 @@ depends_on = None


 DEEP_RESEARCH_TOOL = {
-    "name": "ResearchAgent",
+    "name": RESEARCH_AGENT_DB_NAME,
    "display_name": "Research Agent",
    "description": "The Research Agent is a sub-agent that conducts research on a specific topic.",
    "in_code_tool_id": "ResearchAgent",
--- a/backend/alembic/versions/d09fc20a3c66_seed_builtin_tools.py
+++ b/backend/alembic/versions/d09fc20a3c66_seed_builtin_tools.py
@@ -70,66 +70,80 @@ BUILT_IN_TOOLS = [
 def upgrade() -> None:
    conn = op.get_bind()

-    # Get existing tools to check what already exists
-    existing_tools = conn.execute(
-        sa.text("SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL")
-    ).fetchall()
-    existing_tool_ids = {row[0] for row in existing_tools}
+    # Start transaction
+    conn.execute(sa.text("BEGIN"))

-    # Insert or update built-in tools
-    for tool in BUILT_IN_TOOLS:
-        in_code_id = tool["in_code_tool_id"]
+    try:
+        # Get existing tools to check what already exists
+        existing_tools = conn.execute(
+            sa.text(
+                "SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL"
+            )
+        ).fetchall()
+        existing_tool_ids = {row[0] for row in existing_tools}

-        # Handle historical rename: InternetSearchTool -> WebSearchTool
-        if (
-            in_code_id == "WebSearchTool"
-            and "WebSearchTool" not in existing_tool_ids
-            and "InternetSearchTool" in existing_tool_ids
-        ):
-            # Rename the existing InternetSearchTool row in place and update fields
-            conn.execute(
-                sa.text(
-                    """
-                    UPDATE tool
-                    SET name = :name,
-                        display_name = :display_name,
-                        description = :description,
-                        in_code_tool_id = :in_code_tool_id
-                    WHERE in_code_tool_id = 'InternetSearchTool'
-                    """
-                ),
-                tool,
-            )
-            # Keep the local view of existing ids in sync to avoid duplicate insert
-            existing_tool_ids.discard("InternetSearchTool")
-            existing_tool_ids.add("WebSearchTool")
-            continue
+        # Insert or update built-in tools
+        for tool in BUILT_IN_TOOLS:
+            in_code_id = tool["in_code_tool_id"]

-        if in_code_id in existing_tool_ids:
-            # Update existing tool
-            conn.execute(
-                sa.text(
-                    """
-                    UPDATE tool
-                    SET name = :name,
-                        display_name = :display_name,
-                        description = :description
-                    WHERE in_code_tool_id = :in_code_tool_id
-                    """
-                ),
-                tool,
-            )
-        else:
-            # Insert new tool
-            conn.execute(
-                sa.text(
-                    """
-                    INSERT INTO tool (name, display_name, description, in_code_tool_id)
-                    VALUES (:name, :display_name, :description, :in_code_tool_id)
-                    """
-                ),
-                tool,
-            )
+            # Handle historical rename: InternetSearchTool -> WebSearchTool
+            if (
+                in_code_id == "WebSearchTool"
+                and "WebSearchTool" not in existing_tool_ids
+                and "InternetSearchTool" in existing_tool_ids
+            ):
+                # Rename the existing InternetSearchTool row in place and update fields
+                conn.execute(
+                    sa.text(
+                        """
+                        UPDATE tool
+                        SET name = :name,
+                            display_name = :display_name,
+                            description = :description,
+                            in_code_tool_id = :in_code_tool_id
+                        WHERE in_code_tool_id = 'InternetSearchTool'
+                        """
+                    ),
+                    tool,
+                )
+                # Keep the local view of existing ids in sync to avoid duplicate insert
+                existing_tool_ids.discard("InternetSearchTool")
+                existing_tool_ids.add("WebSearchTool")
+                continue
+
+            if in_code_id in existing_tool_ids:
+                # Update existing tool
+                conn.execute(
+                    sa.text(
+                        """
+                        UPDATE tool
+                        SET name = :name,
+                            display_name = :display_name,
+                            description = :description
+                        WHERE in_code_tool_id = :in_code_tool_id
+                        """
+                    ),
+                    tool,
+                )
+            else:
+                # Insert new tool
+                conn.execute(
+                    sa.text(
+                        """
+                        INSERT INTO tool (name, display_name, description, in_code_tool_id)
+                        VALUES (:name, :display_name, :description, :in_code_tool_id)
+                        """
+                    ),
+                    tool,
+                )
+
+        # Commit transaction
+        conn.execute(sa.text("COMMIT"))
+
+    except Exception as e:
+        # Rollback on error
+        conn.execute(sa.text("ROLLBACK"))
+        raise e


 def downgrade() -> None:
--- a/backend/alembic/versions/d1b637d7050a_sync_exa_api_key_to_content_provider.py
+++ b/backend/alembic/versions/d1b637d7050a_sync_exa_api_key_to_content_provider.py
@@ -1,64 +0,0 @@
-"""sync_exa_api_key_to_content_provider
-
-Revision ID: d1b637d7050a
-Revises: d25168c2beee
-Create Date: 2026-01-09 15:54:15.646249
-
-"""
-
-from alembic import op
-from sqlalchemy import text
-
-
-# revision identifiers, used by Alembic.
-revision = "d1b637d7050a"
-down_revision = "d25168c2beee"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Exa uses a shared API key between search and content providers.
-    # For existing Exa search providers with API keys, create the corresponding
-    # content provider if it doesn't exist yet.
-    connection = op.get_bind()
-
-    # Check if Exa search provider exists with an API key
-    result = connection.execute(
-        text(
-            """
-            SELECT api_key FROM internet_search_provider
-            WHERE provider_type = 'exa' AND api_key IS NOT NULL
-            LIMIT 1
-            """
-        )
-    )
-    row = result.fetchone()
-
-    if row:
-        api_key = row[0]
-        # Create Exa content provider with the shared key
-        connection.execute(
-            text(
-                """
-                INSERT INTO internet_content_provider
-                (name, provider_type, api_key, is_active)
-                VALUES ('Exa', 'exa', :api_key, false)
-                ON CONFLICT (name) DO NOTHING
-                """
-            ),
-            {"api_key": api_key},
-        )
-
-
-def downgrade() -> None:
-    # Remove the Exa content provider that was created by this migration
-    connection = op.get_bind()
-    connection.execute(
-        text(
-            """
-            DELETE FROM internet_content_provider
-            WHERE provider_type = 'exa'
-            """
-        )
-    )
--- a/backend/alembic/versions/d25168c2beee_tool_name_consistency.py
+++ b/backend/alembic/versions/d25168c2beee_tool_name_consistency.py
@@ -1,86 +0,0 @@
-"""tool_name_consistency
-
-Revision ID: d25168c2beee
-Revises: 8405ca81cc83
-Create Date: 2026-01-11 17:54:40.135777
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "d25168c2beee"
-down_revision = "8405ca81cc83"
-branch_labels = None
-depends_on = None
-
-
-# Currently the seeded tools have the in_code_tool_id == name
-CURRENT_TOOL_NAME_MAPPING = [
-    "SearchTool",
-    "WebSearchTool",
-    "ImageGenerationTool",
-    "PythonTool",
-    "OpenURLTool",
-    "KnowledgeGraphTool",
-    "ResearchAgent",
-]
-
-# Mapping of in_code_tool_id -> name
-# These are the expected names that we want in the database
-EXPECTED_TOOL_NAME_MAPPING = {
-    "SearchTool": "internal_search",
-    "WebSearchTool": "web_search",
-    "ImageGenerationTool": "generate_image",
-    "PythonTool": "python",
-    "OpenURLTool": "open_url",
-    "KnowledgeGraphTool": "run_kg_search",
-    "ResearchAgent": "research_agent",
-}
-
-
-def upgrade() -> None:
-    conn = op.get_bind()
-
-    # Mapping of in_code_tool_id to the NAME constant from each tool class
-    # These match the .name property of each tool implementation
-    tool_name_mapping = EXPECTED_TOOL_NAME_MAPPING
-
-    # Update the name column for each tool based on its in_code_tool_id
-    for in_code_tool_id, expected_name in tool_name_mapping.items():
-        conn.execute(
-            sa.text(
-                """
-                UPDATE tool
-                SET name = :expected_name
-                WHERE in_code_tool_id = :in_code_tool_id
-                """
-            ),
-            {
-                "expected_name": expected_name,
-                "in_code_tool_id": in_code_tool_id,
-            },
-        )
-
-
-def downgrade() -> None:
-    conn = op.get_bind()
-
-    # Reverse the migration by setting name back to in_code_tool_id
-    # This matches the original pattern where name was the class name
-    for in_code_tool_id in CURRENT_TOOL_NAME_MAPPING:
-        conn.execute(
-            sa.text(
-                """
-                UPDATE tool
-                SET name = :current_name
-                WHERE in_code_tool_id = :in_code_tool_id
-                """
-            ),
-            {
-                "current_name": in_code_tool_id,
-                "in_code_tool_id": in_code_tool_id,
-            },
-        )
--- a/backend/alembic/versions/fb80bdd256de_add_chat_background_to_user.py
+++ b/backend/alembic/versions/fb80bdd256de_add_chat_background_to_user.py
@@ -1,31 +0,0 @@
-"""add chat_background to user
-
-Revision ID: fb80bdd256de
-Revises: 8b5ce697290e
-Create Date: 2026-01-16 16:15:59.222617
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "fb80bdd256de"
-down_revision = "8b5ce697290e"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "user",
-        sa.Column(
-            "chat_background",
-            sa.String(),
-            nullable=True,
-        ),
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("user", "chat_background")
--- a/backend/ee/onyx/configs/app_configs.py
+++ b/backend/ee/onyx/configs/app_configs.py
@@ -109,6 +109,11 @@ CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS = float(


 STRIPE_SECRET_KEY = os.environ.get("STRIPE_SECRET_KEY")
+STRIPE_PRICE_ID = os.environ.get("STRIPE_PRICE")
+
+OPENAI_DEFAULT_API_KEY = os.environ.get("OPENAI_DEFAULT_API_KEY")
+ANTHROPIC_DEFAULT_API_KEY = os.environ.get("ANTHROPIC_DEFAULT_API_KEY")
+COHERE_DEFAULT_API_KEY = os.environ.get("COHERE_DEFAULT_API_KEY")

 # JWT Public Key URL
 JWT_PUBLIC_KEY_URL: str | None = os.getenv("JWT_PUBLIC_KEY_URL", None)
@@ -128,8 +133,3 @@ MARKETING_POSTHOG_API_KEY = os.environ.get("MARKETING_POSTHOG_API_KEY")
 HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")

 GATED_TENANTS_KEY = "gated_tenants"
-
-# License enforcement - when True, blocks API access for gated/expired licenses
-LICENSE_ENFORCEMENT_ENABLED = (
-    os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "").lower() == "true"
-)
--- a/backend/ee/onyx/db/persona.py
+++ b/backend/ee/onyx/db/persona.py
@@ -3,42 +3,30 @@ from uuid import UUID
 from sqlalchemy.orm import Session

 from onyx.configs.constants import NotificationType
-from onyx.db.models import Persona
 from onyx.db.models import Persona__User
 from onyx.db.models import Persona__UserGroup
 from onyx.db.notification import create_notification
 from onyx.server.features.persona.models import PersonaSharedNotificationData


-def update_persona_access(
+def make_persona_private(
    persona_id: int,
    creator_user_id: UUID | None,
+    user_ids: list[UUID] | None,
+    group_ids: list[int] | None,
    db_session: Session,
-    is_public: bool | None = None,
-    user_ids: list[UUID] | None = None,
-    group_ids: list[int] | None = None,
 ) -> None:
-    """Updates the access settings for a persona including public status, user shares,
-    and group shares.
+    """NOTE(rkuo): This function batches all updates into a single commit. If we don't
+    dedupe the inputs, the commit will exception."""

-    NOTE: This function batches all updates. If we don't dedupe the inputs,
-    the commit will exception.
-
-    NOTE: Callers are responsible for committing."""
-
-    if is_public is not None:
-        persona = db_session.query(Persona).filter(Persona.id == persona_id).first()
-        if persona:
-            persona.is_public = is_public
-
-    # NOTE: For user-ids and group-ids, `None` means "leave unchanged", `[]` means "clear all shares",
-    # and a non-empty list means "replace with these shares".
-
-    if user_ids is not None:
-        db_session.query(Persona__User).filter(
-            Persona__User.persona_id == persona_id
-        ).delete(synchronize_session="fetch")
+    db_session.query(Persona__User).filter(
+        Persona__User.persona_id == persona_id
+    ).delete(synchronize_session="fetch")
+    db_session.query(Persona__UserGroup).filter(
+        Persona__UserGroup.persona_id == persona_id
+    ).delete(synchronize_session="fetch")

+    if user_ids:
        user_ids_set = set(user_ids)
        for user_id in user_ids_set:
            db_session.add(Persona__User(persona_id=persona_id, user_id=user_id))
@@ -46,20 +34,17 @@ def update_persona_access(
                create_notification(
                    user_id=user_id,
                    notif_type=NotificationType.PERSONA_SHARED,
-                    title="A new agent was shared with you!",
                    db_session=db_session,
                    additional_data=PersonaSharedNotificationData(
                        persona_id=persona_id,
                    ).model_dump(),
                )

-    if group_ids is not None:
-        db_session.query(Persona__UserGroup).filter(
-            Persona__UserGroup.persona_id == persona_id
-        ).delete(synchronize_session="fetch")
-
+    if group_ids:
        group_ids_set = set(group_ids)
        for group_id in group_ids_set:
            db_session.add(
                Persona__UserGroup(persona_id=persona_id, user_group_id=group_id)
            )
+
+    db_session.commit()
--- a/backend/ee/onyx/db/search.py
+++ b/backend/ee/onyx/db/search.py
@@ -1,64 +0,0 @@
-import uuid
-from datetime import timedelta
-from uuid import UUID
-
-from sqlalchemy import select
-from sqlalchemy.orm import Session
-
-from onyx.db.engine.time_utils import get_db_current_time
-from onyx.db.models import SearchQuery
-
-
-def create_search_query(
-    db_session: Session,
-    user_id: UUID,
-    query: str,
-    query_expansions: list[str] | None = None,
-) -> SearchQuery:
-    """Create and persist a `SearchQuery` row.
-
-    Notes:
-    - `SearchQuery.id` is a UUID PK without a server-side default, so we generate it.
-    - `created_at` is filled by the DB (server_default=now()).
-    """
-    search_query = SearchQuery(
-        id=uuid.uuid4(),
-        user_id=user_id,
-        query=query,
-        query_expansions=query_expansions,
-    )
-    db_session.add(search_query)
-    db_session.commit()
-    db_session.refresh(search_query)
-    return search_query
-
-
-def fetch_search_queries_for_user(
-    db_session: Session,
-    user_id: UUID,
-    filter_days: int | None = None,
-    limit: int | None = None,
-) -> list[SearchQuery]:
-    """Fetch `SearchQuery` rows for a user.
-
-    Args:
-        user_id: User UUID.
-        filter_days: Optional time filter. If provided, only rows created within
-            the last `filter_days` days are returned.
-        limit: Optional max number of rows to return.
-    """
-    if filter_days is not None and filter_days <= 0:
-        raise ValueError("filter_days must be > 0")
-
-    stmt = select(SearchQuery).where(SearchQuery.user_id == user_id)
-
-    if filter_days is not None and filter_days > 0:
-        cutoff = get_db_current_time(db_session) - timedelta(days=filter_days)
-        stmt = stmt.where(SearchQuery.created_at >= cutoff)
-
-    stmt = stmt.order_by(SearchQuery.created_at.desc())
-
-    if limit is not None:
-        stmt = stmt.limit(limit)
-
-    return list(db_session.scalars(stmt).all())
--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -16,17 +16,16 @@ from ee.onyx.server.enterprise_settings.api import (
 from ee.onyx.server.evals.api import router as evals_router
 from ee.onyx.server.license.api import router as license_router
 from ee.onyx.server.manage.standard_answer import router as standard_answer_router
-from ee.onyx.server.middleware.license_enforcement import (
-    add_license_enforcement_middleware,
-)
 from ee.onyx.server.middleware.tenant_tracking import (
    add_api_server_tenant_id_middleware,
 )
 from ee.onyx.server.oauth.api import router as ee_oauth_router
+from ee.onyx.server.query_and_chat.chat_backend import (
+    router as chat_router,
+)
 from ee.onyx.server.query_and_chat.query_backend import (
    basic_router as ee_query_router,
 )
-from ee.onyx.server.query_and_chat.search_backend import router as search_router
 from ee.onyx.server.query_history.api import router as query_history_router
 from ee.onyx.server.reporting.usage_export_api import router as usage_export_router
 from ee.onyx.server.seeding import seed_db
@@ -86,10 +85,6 @@ def get_application() -> FastAPI:
    if MULTI_TENANT:
        add_api_server_tenant_id_middleware(application, logger)

-    # Add license enforcement middleware (runs after tenant tracking)
-    # This blocks access when license is expired/gated
-    add_license_enforcement_middleware(application, logger)
-
    if AUTH_TYPE == AuthType.CLOUD:
        # For Google OAuth, refresh tokens are requested by:
        # 1. Adding the right scopes
@@ -129,7 +124,7 @@ def get_application() -> FastAPI:
    # EE only backend APIs
    include_router_with_global_prefix_prepended(application, query_router)
    include_router_with_global_prefix_prepended(application, ee_query_router)
-    include_router_with_global_prefix_prepended(application, search_router)
+    include_router_with_global_prefix_prepended(application, chat_router)
    include_router_with_global_prefix_prepended(application, standard_answer_router)
    include_router_with_global_prefix_prepended(application, ee_oauth_router)
    include_router_with_global_prefix_prepended(application, ee_document_cc_pair_router)
--- a/backend/ee/onyx/prompts/query_expansion.py
+++ b/backend/ee/onyx/prompts/query_expansion.py
@@ -1,27 +0,0 @@
-# Single message is likely most reliable and generally better for this task
-# No final reminders at the end since the user query is expected to be short
-# If it is not short, it should go into the chat flow so we do not need to account for this.
-KEYWORD_EXPANSION_PROMPT = """
-Generate a set of keyword-only queries to help find relevant documents for the provided query. \
-These queries will be passed to a bm25-based keyword search engine. \
-Provide a single query per line (where each query consists of one or more keywords). \
-The queries must be purely keywords and not contain any filler natural language. \
-The each query should have as few keywords as necessary to represent the user's search intent. \
-If there are no useful expansions, simply return the original query with no additional keyword queries. \
-CRITICAL: Do not include any additional formatting, comments, or anything aside from the keyword queries.
-
-The user query is:
-{user_query}
-""".strip()
-
-
-QUERY_TYPE_PROMPT = """
-Determine if the provided query is better suited for a keyword search or a semantic search.
-Respond with "keyword" or "semantic" literally and nothing else.
-Do not provide any additional text or reasoning to your response.
-
-CRITICAL: It must only be 1 single word - EITHER "keyword" or "semantic".
-
-The user query is:
-{user_query}
-""".strip()
--- a/backend/ee/onyx/prompts/search_flow_classification.py
+++ b/backend/ee/onyx/prompts/search_flow_classification.py
@@ -1,42 +0,0 @@
-# ruff: noqa: E501, W605 start
-SEARCH_CLASS = "search"
-CHAT_CLASS = "chat"
-
-# Will note that with many larger LLMs the latency on running this prompt via third party APIs is as high as 2 seconds which is too slow for many
-# use cases.
-SEARCH_CHAT_PROMPT = f"""
-Determine if the following query is better suited for a search UI or a chat UI. Respond with "{SEARCH_CLASS}" or "{CHAT_CLASS}" literally and nothing else. \
-Do not provide any additional text or reasoning to your response. CRITICAL, IT MUST ONLY BE 1 SINGLE WORD - EITHER "{SEARCH_CLASS}" or "{CHAT_CLASS}".
-
-# Classification Guidelines:
-## {SEARCH_CLASS}
- If the query consists entirely of keywords or query doesn't require any answer from the AI
- If the query is a short statement that seems like a search query rather than a question
- If the query feels nonsensical or is a short phrase that possibly describes a document or information that could be found in a internal document
-
-### Examples of {SEARCH_CLASS} queries:
- Find me the document that goes over the onboarding process for a new hire
- Pull requests since last week
- Sales Runbook AMEA Region
- Procurement process
- Retrieve the PRD for project X
-
-## {CHAT_CLASS}
- If the query is asking a question that requires an answer rather than a document
- If the query is asking for a solution, suggestion, or general help
- If the query is seeking information that is on the web and likely not in a company internal document
- If the query should be answered without any context from additional documents or searches
-
-### Examples of {CHAT_CLASS} queries:
- What led us to win the deal with company X? (seeking answer)
- Google Drive not sync-ing files to my computer (seeking solution)
- Review my email: <whatever the email is> (general help)
- Write me a script to... (general help)
- Cheap flights Europe to Tokyo (information likely found on the web, not internal)
-
-# User Query:
-{{user_query}}
-
-REMEMBER TO ONLY RESPOND WITH "{SEARCH_CLASS}" OR "{CHAT_CLASS}" AND NOTHING ELSE.
-""".strip()
-# ruff: noqa: E501, W605 end
--- a/backend/ee/onyx/search/process_search_query.py
+++ b/backend/ee/onyx/search/process_search_query.py
@@ -1,286 +0,0 @@
-from collections.abc import Generator
-
-from sqlalchemy.orm import Session
-
-from ee.onyx.db.search import create_search_query
-from ee.onyx.secondary_llm_flows.query_expansion import expand_keywords
-from ee.onyx.server.query_and_chat.models import SearchDocWithContent
-from ee.onyx.server.query_and_chat.models import SearchFullResponse
-from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
-from ee.onyx.server.query_and_chat.streaming_models import LLMSelectedDocsPacket
-from ee.onyx.server.query_and_chat.streaming_models import SearchDocsPacket
-from ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket
-from ee.onyx.server.query_and_chat.streaming_models import SearchQueriesPacket
-from onyx.context.search.models import BaseFilters
-from onyx.context.search.models import ChunkSearchRequest
-from onyx.context.search.models import InferenceChunk
-from onyx.context.search.pipeline import merge_individual_chunks
-from onyx.context.search.pipeline import search_pipeline
-from onyx.db.models import User
-from onyx.db.search_settings import get_current_search_settings
-from onyx.document_index.factory import get_default_document_index
-from onyx.document_index.interfaces import DocumentIndex
-from onyx.llm.factory import get_default_llm
-from onyx.secondary_llm_flows.document_filter import select_sections_for_expansion
-from onyx.tools.tool_implementations.search.search_utils import (
-    weighted_reciprocal_rank_fusion,
-)
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
-
-logger = setup_logger()
-
-
-# This is just a heuristic that also happens to work well for the UI/UX
-# Users would not find it useful to see a huge list of suggested docs
-# but more than 1 is also likely good as many questions may target more than 1 doc.
-TARGET_NUM_SECTIONS_FOR_LLM_SELECTION = 3
-
-
-def _run_single_search(
-    query: str,
-    filters: BaseFilters | None,
-    document_index: DocumentIndex,
-    user: User | None,
-    db_session: Session,
-    num_hits: int | None = None,
-) -> list[InferenceChunk]:
-    """Execute a single search query and return chunks."""
-    chunk_search_request = ChunkSearchRequest(
-        query=query,
-        user_selected_filters=filters,
-        limit=num_hits,
-    )
-
-    return search_pipeline(
-        chunk_search_request=chunk_search_request,
-        document_index=document_index,
-        user=user,
-        persona=None,  # No persona for direct search
-        db_session=db_session,
-    )
-
-
-def stream_search_query(
-    request: SendSearchQueryRequest,
-    user: User | None,
-    db_session: Session,
-) -> Generator[
-    SearchQueriesPacket | SearchDocsPacket | LLMSelectedDocsPacket | SearchErrorPacket,
-    None,
-    None,
-]:
-    """
-    Core search function that yields streaming packets.
-    Used by both streaming and non-streaming endpoints.
-    """
-    # Get document index
-    search_settings = get_current_search_settings(db_session)
-    # This flow is for search so we do not get all indices.
-    document_index = get_default_document_index(search_settings, None)
-
-    # Determine queries to execute
-    original_query = request.search_query
-    keyword_expansions: list[str] = []
-
-    if request.run_query_expansion:
-        try:
-            llm = get_default_llm()
-            keyword_expansions = expand_keywords(
-                user_query=original_query,
-                llm=llm,
-            )
-            if keyword_expansions:
-                logger.debug(
-                    f"Query expansion generated {len(keyword_expansions)} keyword queries"
-                )
-        except Exception as e:
-            logger.warning(f"Query expansion failed: {e}; using original query only.")
-            keyword_expansions = []
-
-    # Build list of all executed queries for tracking
-    all_executed_queries = [original_query] + keyword_expansions
-
-    # TODO remove this check, user should not be None
-    if user is not None:
-        create_search_query(
-            db_session=db_session,
-            user_id=user.id,
-            query=request.search_query,
-            query_expansions=keyword_expansions if keyword_expansions else None,
-        )
-
-    # Execute search(es)
-    if not keyword_expansions:
-        # Single query (original only) - no threading needed
-        chunks = _run_single_search(
-            query=original_query,
-            filters=request.filters,
-            document_index=document_index,
-            user=user,
-            db_session=db_session,
-            num_hits=request.num_hits,
-        )
-    else:
-        # Multiple queries - run in parallel and merge with RRF
-        # First query is the original (semantic), rest are keyword expansions
-        search_functions = [
-            (
-                _run_single_search,
-                (
-                    query,
-                    request.filters,
-                    document_index,
-                    user,
-                    db_session,
-                    request.num_hits,
-                ),
-            )
-            for query in all_executed_queries
-        ]
-
-        # Run all searches in parallel
-        all_search_results: list[list[InferenceChunk]] = (
-            run_functions_tuples_in_parallel(
-                search_functions,
-                allow_failures=True,
-            )
-        )
-
-        # Separate original query results from keyword expansion results
-        # Note that in rare cases, the original query may have failed and so we may be
-        # just overweighting one set of keyword results, should be not a big deal though.
-        original_result = all_search_results[0] if all_search_results else []
-        keyword_results = all_search_results[1:] if len(all_search_results) > 1 else []
-
-        # Build valid results and weights
-        # Original query (semantic): weight 2.0
-        # Keyword expansions: weight 1.0 each
-        valid_results: list[list[InferenceChunk]] = []
-        weights: list[float] = []
-
-        if original_result:
-            valid_results.append(original_result)
-            weights.append(2.0)
-
-        for keyword_result in keyword_results:
-            if keyword_result:
-                valid_results.append(keyword_result)
-                weights.append(1.0)
-
-        if not valid_results:
-            logger.warning("All parallel searches returned empty results")
-            chunks = []
-        else:
-            chunks = weighted_reciprocal_rank_fusion(
-                ranked_results=valid_results,
-                weights=weights,
-                id_extractor=lambda chunk: f"{chunk.document_id}_{chunk.chunk_id}",
-            )
-
-    # Merge chunks into sections
-    sections = merge_individual_chunks(chunks)
-
-    # Truncate to the requested number of hits
-    sections = sections[: request.num_hits]
-
-    # Apply LLM document selection if requested
-    # num_docs_fed_to_llm_selection specifies how many sections to feed to the LLM for selection
-    # The LLM will always try to select TARGET_NUM_SECTIONS_FOR_LLM_SELECTION sections from those fed to it
-    # llm_selected_doc_ids will be:
-    #   - None if LLM selection was not requested or failed
-    #   - Empty list if LLM selection ran but selected nothing
-    #   - List of doc IDs if LLM selection succeeded
-    run_llm_selection = (
-        request.num_docs_fed_to_llm_selection is not None
-        and request.num_docs_fed_to_llm_selection >= 1
-    )
-    llm_selected_doc_ids: list[str] | None = None
-    llm_selection_failed = False
-    if run_llm_selection and sections:
-        try:
-            llm = get_default_llm()
-            sections_to_evaluate = sections[: request.num_docs_fed_to_llm_selection]
-            selected_sections, _ = select_sections_for_expansion(
-                sections=sections_to_evaluate,
-                user_query=original_query,
-                llm=llm,
-                max_sections=TARGET_NUM_SECTIONS_FOR_LLM_SELECTION,
-                try_to_fill_to_max=True,
-            )
-            # Extract unique document IDs from selected sections (may be empty)
-            llm_selected_doc_ids = list(
-                dict.fromkeys(
-                    section.center_chunk.document_id for section in selected_sections
-                )
-            )
-            logger.debug(
-                f"LLM document selection evaluated {len(sections_to_evaluate)} sections, "
-                f"selected {len(selected_sections)} sections with doc IDs: {llm_selected_doc_ids}"
-            )
-        except Exception as e:
-            # Allowing a blanket exception here as this step is not critical and the rest of the results are still valid
-            logger.warning(f"LLM document selection failed: {e}")
-            llm_selection_failed = True
-    elif run_llm_selection and not sections:
-        # LLM selection requested but no sections to evaluate
-        llm_selected_doc_ids = []
-
-    # Convert to SearchDocWithContent list, optionally including content
-    search_docs = SearchDocWithContent.from_inference_sections(
-        sections,
-        include_content=request.include_content,
-        is_internet=False,
-    )
-
-    # Yield queries packet
-    yield SearchQueriesPacket(all_executed_queries=all_executed_queries)
-
-    # Yield docs packet
-    yield SearchDocsPacket(search_docs=search_docs)
-
-    # Yield LLM selected docs packet if LLM selection was requested
-    # - llm_selected_doc_ids is None if selection failed
-    # - llm_selected_doc_ids is empty list if no docs were selected
-    # - llm_selected_doc_ids is list of IDs if docs were selected
-    if run_llm_selection:
-        yield LLMSelectedDocsPacket(
-            llm_selected_doc_ids=None if llm_selection_failed else llm_selected_doc_ids
-        )
-
-
-def gather_search_stream(
-    packets: Generator[
-        SearchQueriesPacket
-        | SearchDocsPacket
-        | LLMSelectedDocsPacket
-        | SearchErrorPacket,
-        None,
-        None,
-    ],
-) -> SearchFullResponse:
-    """
-    Aggregate all streaming packets into SearchFullResponse.
-    """
-    all_executed_queries: list[str] = []
-    search_docs: list[SearchDocWithContent] = []
-    llm_selected_doc_ids: list[str] | None = None
-    error: str | None = None
-
-    for packet in packets:
-        if isinstance(packet, SearchQueriesPacket):
-            all_executed_queries = packet.all_executed_queries
-        elif isinstance(packet, SearchDocsPacket):
-            search_docs = packet.search_docs
-        elif isinstance(packet, LLMSelectedDocsPacket):
-            llm_selected_doc_ids = packet.llm_selected_doc_ids
-        elif isinstance(packet, SearchErrorPacket):
-            error = packet.error
-
-    return SearchFullResponse(
-        all_executed_queries=all_executed_queries,
-        search_docs=search_docs,
-        doc_selection_reasoning=None,
-        llm_selected_doc_ids=llm_selected_doc_ids,
-        error=error,
-    )
--- a/backend/ee/onyx/secondary_llm_flows/init.py
+++ b/backend/ee/onyx/secondary_llm_flows/init.py
--- a/backend/ee/onyx/secondary_llm_flows/query_expansion.py
+++ b/backend/ee/onyx/secondary_llm_flows/query_expansion.py
@@ -1,92 +0,0 @@
-import re
-
-from ee.onyx.prompts.query_expansion import KEYWORD_EXPANSION_PROMPT
-from onyx.llm.interfaces import LLM
-from onyx.llm.models import LanguageModelInput
-from onyx.llm.models import ReasoningEffort
-from onyx.llm.models import UserMessage
-from onyx.llm.utils import llm_response_to_string
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-# Pattern to remove common LLM artifacts: brackets, quotes, list markers, etc.
-CLEANUP_PATTERN = re.compile(r'[\[\]"\'`]')
-
-
-def _clean_keyword_line(line: str) -> str:
-    """Clean a keyword line by removing common LLM artifacts.
-
-    Removes brackets, quotes, and other characters that LLMs may accidentally
-    include in their output.
-    """
-    # Remove common artifacts
-    cleaned = CLEANUP_PATTERN.sub("", line)
-    # Remove leading list markers like "1.", "2.", "-", "*"
-    cleaned = re.sub(r"^\s*(?:\d+[\.\)]\s*|[-*]\s*)", "", cleaned)
-    return cleaned.strip()
-
-
-def expand_keywords(
-    user_query: str,
-    llm: LLM,
-) -> list[str]:
-    """Expand a user query into multiple keyword-only queries for BM25 search.
-
-    Uses an LLM to generate keyword-based search queries that capture different
-    aspects of the user's search intent. Returns only the expanded queries,
-    not the original query.
-
-    Args:
-        user_query: The original search query from the user
-        llm: Language model to use for keyword expansion
-
-    Returns:
-        List of expanded keyword queries (excluding the original query).
-        Returns empty list if expansion fails or produces no useful expansions.
-    """
-    messages: LanguageModelInput = [
-        UserMessage(content=KEYWORD_EXPANSION_PROMPT.format(user_query=user_query))
-    ]
-
-    try:
-        response = llm.invoke(
-            prompt=messages,
-            reasoning_effort=ReasoningEffort.OFF,
-            # Limit output - we only expect a few short keyword queries
-            max_tokens=150,
-        )
-
-        content = llm_response_to_string(response).strip()
-
-        if not content:
-            logger.warning("Keyword expansion returned empty response.")
-            return []
-
-        # Parse response - each line is a separate keyword query
-        # Clean each line to remove LLM artifacts and drop empty lines
-        parsed_queries = []
-        for line in content.strip().split("\n"):
-            cleaned = _clean_keyword_line(line)
-            if cleaned:
-                parsed_queries.append(cleaned)
-
-        if not parsed_queries:
-            logger.warning("Keyword expansion parsing returned no queries.")
-            return []
-
-        # Filter out duplicates and queries that match the original
-        expanded_queries: list[str] = []
-        seen_lower: set[str] = {user_query.lower()}
-        for query in parsed_queries:
-            query_lower = query.lower()
-            if query_lower not in seen_lower:
-                seen_lower.add(query_lower)
-                expanded_queries.append(query)
-
-        logger.debug(f"Keyword expansion generated {len(expanded_queries)} queries")
-        return expanded_queries
-
-    except Exception as e:
-        logger.warning(f"Keyword expansion failed: {e}")
-        return []
--- a/backend/ee/onyx/secondary_llm_flows/search_flow_classification.py
+++ b/backend/ee/onyx/secondary_llm_flows/search_flow_classification.py
@@ -1,50 +0,0 @@
-from ee.onyx.prompts.search_flow_classification import CHAT_CLASS
-from ee.onyx.prompts.search_flow_classification import SEARCH_CHAT_PROMPT
-from ee.onyx.prompts.search_flow_classification import SEARCH_CLASS
-from onyx.llm.interfaces import LLM
-from onyx.llm.models import LanguageModelInput
-from onyx.llm.models import ReasoningEffort
-from onyx.llm.models import UserMessage
-from onyx.llm.utils import llm_response_to_string
-from onyx.utils.logger import setup_logger
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-
-@log_function_time(print_only=True)
-def classify_is_search_flow(
-    query: str,
-    llm: LLM,
-) -> bool:
-    messages: LanguageModelInput = [
-        UserMessage(content=SEARCH_CHAT_PROMPT.format(user_query=query))
-    ]
-    response = llm.invoke(
-        prompt=messages,
-        reasoning_effort=ReasoningEffort.OFF,
-        # Nothing can happen in the UI until this call finishes so we need to be aggressive with the timeout
-        timeout_override=2,
-        # Well more than necessary but just to ensure completion and in case it succeeds with classifying but
-        # ends up rambling
-        max_tokens=20,
-    )
-
-    content = llm_response_to_string(response).strip().lower()
-    if not content:
-        logger.warning(
-            "Search flow classification returned empty response; defaulting to chat flow."
-        )
-        return False
-
-    # Prefer chat if both appear.
-    if CHAT_CLASS in content:
-        return False
-    if SEARCH_CLASS in content:
-        return True
-
-    logger.warning(
-        "Search flow classification returned unexpected response; defaulting to chat flow. Response=%r",
-        content,
-    )
-    return False
--- a/backend/ee/onyx/server/analytics/api.py
+++ b/backend/ee/onyx/server/analytics/api.py
@@ -19,11 +19,10 @@ from ee.onyx.db.analytics import fetch_query_analytics
 from ee.onyx.db.analytics import user_can_view_assistant_stats
 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_user
-from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import User

-router = APIRouter(prefix="/analytics", tags=PUBLIC_API_TAGS)
+router = APIRouter(prefix="/analytics")


 _DEFAULT_LOOKBACK_DAYS = 30
--- a/backend/ee/onyx/server/auth_check.py
+++ b/backend/ee/onyx/server/auth_check.py
@@ -10,8 +10,6 @@ EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
    ("/enterprise-settings/logo", {"GET"}),
    ("/enterprise-settings/logotype", {"GET"}),
    ("/enterprise-settings/custom-analytics-script", {"GET"}),
-    # Stripe publishable key is safe to expose publicly
-    ("/tenants/stripe-publishable-key", {"GET"}),
 ]


--- a/backend/ee/onyx/server/enterprise_settings/models.py
+++ b/backend/ee/onyx/server/enterprise_settings/models.py
@@ -1,4 +1,3 @@
-from enum import Enum
 from typing import Any
 from typing import List

@@ -24,12 +23,6 @@ class NavigationItem(BaseModel):
        return instance


-class LogoDisplayStyle(str, Enum):
-    LOGO_AND_NAME = "logo_and_name"
-    LOGO_ONLY = "logo_only"
-    NAME_ONLY = "name_only"
-
-
 class EnterpriseSettings(BaseModel):
    """General settings that only apply to the Enterprise Edition of Onyx

@@ -38,7 +31,6 @@ class EnterpriseSettings(BaseModel):
    application_name: str | None = None
    use_custom_logo: bool = False
    use_custom_logotype: bool = False
-    logo_display_style: LogoDisplayStyle | None = None

    # custom navigation
    custom_nav_items: List[NavigationItem] = Field(default_factory=list)
@@ -50,9 +42,6 @@ class EnterpriseSettings(BaseModel):
    custom_popup_header: str | None = None
    custom_popup_content: str | None = None
    enable_consent_screen: bool | None = None
-    consent_screen_prompt: str | None = None
-    show_first_visit_notice: bool | None = None
-    custom_greeting_message: str | None = None

    def check_validity(self) -> None:
        return
--- a/backend/ee/onyx/server/middleware/license_enforcement.py
+++ b/backend/ee/onyx/server/middleware/license_enforcement.py
@@ -1,102 +0,0 @@
-"""Middleware to enforce license status application-wide."""
-
-import logging
-from collections.abc import Awaitable
-from collections.abc import Callable
-
-from fastapi import FastAPI
-from fastapi import Request
-from fastapi import Response
-from fastapi.responses import JSONResponse
-from redis.exceptions import RedisError
-
-from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
-from ee.onyx.db.license import get_cached_license_metadata
-from ee.onyx.server.tenants.product_gating import is_tenant_gated
-from onyx.server.settings.models import ApplicationStatus
-from shared_configs.configs import MULTI_TENANT
-from shared_configs.contextvars import get_current_tenant_id
-
-# Paths that are ALWAYS accessible, even when license is expired/gated.
-# These enable users to:
-#   /auth - Log in/out (users can't fix billing if locked out of auth)
-#   /license - Fetch, upload, or check license status
-#   /health - Health checks for load balancers/orchestrators
-#   /me - Basic user info needed for UI rendering
-#   /settings, /enterprise-settings - View app status and branding
-#   /tenants/billing-* - Manage subscription to resolve gating
-ALLOWED_PATH_PREFIXES = {
-    "/auth",
-    "/license",
-    "/health",
-    "/me",
-    "/settings",
-    "/enterprise-settings",
-    "/tenants/billing-information",
-    "/tenants/create-customer-portal-session",
-    "/tenants/create-subscription-session",
-}
-
-
-def _is_path_allowed(path: str) -> bool:
-    """Check if path is in allowlist (prefix match)."""
-    return any(path.startswith(prefix) for prefix in ALLOWED_PATH_PREFIXES)
-
-
-def add_license_enforcement_middleware(
-    app: FastAPI, logger: logging.LoggerAdapter
-) -> None:
-    logger.info("License enforcement middleware registered")
-
-    @app.middleware("http")
-    async def enforce_license(
-        request: Request, call_next: Callable[[Request], Awaitable[Response]]
-    ) -> Response:
-        """Block requests when license is expired/gated."""
-        if not LICENSE_ENFORCEMENT_ENABLED:
-            return await call_next(request)
-
-        path = request.url.path
-        if path.startswith("/api"):
-            path = path[4:]
-
-        if _is_path_allowed(path):
-            return await call_next(request)
-
-        is_gated = False
-        tenant_id = get_current_tenant_id()
-
-        if MULTI_TENANT:
-            try:
-                is_gated = is_tenant_gated(tenant_id)
-            except RedisError as e:
-                logger.warning(f"Failed to check tenant gating status: {e}")
-                # Fail open - don't block users due to Redis connectivity issues
-                is_gated = False
-        else:
-            try:
-                metadata = get_cached_license_metadata(tenant_id)
-                if metadata:
-                    if metadata.status == ApplicationStatus.GATED_ACCESS:
-                        is_gated = True
-                else:
-                    # No license metadata = gated for self-hosted EE
-                    is_gated = True
-            except RedisError as e:
-                logger.warning(f"Failed to check license metadata: {e}")
-                # Fail open - don't block users due to Redis connectivity issues
-                is_gated = False
-
-        if is_gated:
-            logger.info(f"Blocking request for gated tenant: {tenant_id}, path={path}")
-            return JSONResponse(
-                status_code=402,
-                content={
-                    "detail": {
-                        "error": "license_expired",
-                        "message": "Your subscription has expired. Please update your billing.",
-                    }
-                },
-            )
-
-        return await call_next(request)
--- a/backend/ee/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/chat_backend.py
@@ -0,0 +1,216 @@
+from fastapi import APIRouter
+from fastapi import Depends
+from fastapi import HTTPException
+from sqlalchemy.orm import Session
+
+from ee.onyx.server.query_and_chat.models import BasicCreateChatMessageRequest
+from ee.onyx.server.query_and_chat.models import (
+    BasicCreateChatMessageWithHistoryRequest,
+)
+from onyx.auth.users import current_user
+from onyx.chat.chat_utils import create_chat_history_chain
+from onyx.chat.models import ChatBasicResponse
+from onyx.chat.process_message import gather_stream
+from onyx.chat.process_message import stream_chat_message_objects
+from onyx.configs.constants import MessageType
+from onyx.context.search.models import OptionalSearchSetting
+from onyx.context.search.models import RetrievalDetails
+from onyx.db.chat import create_chat_session
+from onyx.db.chat import create_new_chat_message
+from onyx.db.chat import get_or_create_root_message
+from onyx.db.engine.sql_engine import get_session
+from onyx.db.models import User
+from onyx.llm.factory import get_llm_for_persona
+from onyx.natural_language_processing.utils import get_tokenizer
+from onyx.server.query_and_chat.models import CreateChatMessageRequest
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+router = APIRouter(prefix="/chat")
+
+
+@router.post("/send-message-simple-api")
+def handle_simplified_chat_message(
+    chat_message_req: BasicCreateChatMessageRequest,
+    user: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> ChatBasicResponse:
+    """This is a Non-Streaming version that only gives back a minimal set of information"""
+    logger.notice(f"Received new simple api chat message: {chat_message_req.message}")
+
+    if not chat_message_req.message:
+        raise HTTPException(status_code=400, detail="Empty chat message is invalid")
+
+    # Handle chat session creation if chat_session_id is not provided
+    if chat_message_req.chat_session_id is None:
+        if chat_message_req.persona_id is None:
+            raise HTTPException(
+                status_code=400,
+                detail="Either chat_session_id or persona_id must be provided",
+            )
+
+        # Create a new chat session with the provided persona_id
+        try:
+            new_chat_session = create_chat_session(
+                db_session=db_session,
+                description="",  # Leave empty for simple API
+                user_id=user.id if user else None,
+                persona_id=chat_message_req.persona_id,
+            )
+            chat_session_id = new_chat_session.id
+        except Exception as e:
+            logger.exception(e)
+            raise HTTPException(status_code=400, detail="Invalid Persona provided.")
+    else:
+        chat_session_id = chat_message_req.chat_session_id
+
+    try:
+        parent_message = create_chat_history_chain(
+            chat_session_id=chat_session_id, db_session=db_session
+        )[-1]
+    except Exception:
+        parent_message = get_or_create_root_message(
+            chat_session_id=chat_session_id, db_session=db_session
+        )
+
+    if (
+        chat_message_req.retrieval_options is None
+        and chat_message_req.search_doc_ids is None
+    ):
+        retrieval_options: RetrievalDetails | None = RetrievalDetails(
+            run_search=OptionalSearchSetting.ALWAYS,
+            real_time=False,
+        )
+    else:
+        retrieval_options = chat_message_req.retrieval_options
+
+    full_chat_msg_info = CreateChatMessageRequest(
+        chat_session_id=chat_session_id,
+        parent_message_id=parent_message.id,
+        message=chat_message_req.message,
+        file_descriptors=[],
+        search_doc_ids=chat_message_req.search_doc_ids,
+        retrieval_options=retrieval_options,
+        # Simple API does not support reranking, hide complexity from user
+        rerank_settings=None,
+        query_override=chat_message_req.query_override,
+        # Currently only applies to search flow not chat
+        chunks_above=0,
+        chunks_below=0,
+        full_doc=chat_message_req.full_doc,
+        structured_response_format=chat_message_req.structured_response_format,
+    )
+
+    packets = stream_chat_message_objects(
+        new_msg_req=full_chat_msg_info,
+        user=user,
+        db_session=db_session,
+        enforce_chat_session_id_for_search_docs=False,
+    )
+
+    return gather_stream(packets)
+
+
+@router.post("/send-message-simple-with-history")
+def handle_send_message_simple_with_history(
+    req: BasicCreateChatMessageWithHistoryRequest,
+    user: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> ChatBasicResponse:
+    """This is a Non-Streaming version that only gives back a minimal set of information.
+    takes in chat history maintained by the caller
+    and does query rephrasing similar to answer-with-quote"""
+
+    if len(req.messages) == 0:
+        raise HTTPException(status_code=400, detail="Messages cannot be zero length")
+
+    # This is a sanity check to make sure the chat history is valid
+    # It must start with a user message and alternate beteen user and assistant
+    expected_role = MessageType.USER
+    for msg in req.messages:
+        if not msg.message:
+            raise HTTPException(
+                status_code=400, detail="One or more chat messages were empty"
+            )
+
+        if msg.role != expected_role:
+            raise HTTPException(
+                status_code=400,
+                detail="Message roles must start and end with MessageType.USER and alternate in-between.",
+            )
+        if expected_role == MessageType.USER:
+            expected_role = MessageType.ASSISTANT
+        else:
+            expected_role = MessageType.USER
+
+    query = req.messages[-1].message
+    msg_history = req.messages[:-1]
+
+    logger.notice(f"Received new simple with history chat message: {query}")
+
+    user_id = user.id if user is not None else None
+    chat_session = create_chat_session(
+        db_session=db_session,
+        description="handle_send_message_simple_with_history",
+        user_id=user_id,
+        persona_id=req.persona_id,
+    )
+
+    llm = get_llm_for_persona(persona=chat_session.persona, user=user)
+
+    llm_tokenizer = get_tokenizer(
+        model_name=llm.config.model_name,
+        provider_type=llm.config.model_provider,
+    )
+
+    # Every chat Session begins with an empty root message
+    root_message = get_or_create_root_message(
+        chat_session_id=chat_session.id, db_session=db_session
+    )
+
+    chat_message = root_message
+    for msg in msg_history:
+        chat_message = create_new_chat_message(
+            chat_session_id=chat_session.id,
+            parent_message=chat_message,
+            message=msg.message,
+            token_count=len(llm_tokenizer.encode(msg.message)),
+            message_type=msg.role,
+            db_session=db_session,
+            commit=False,
+        )
+    db_session.commit()
+
+    if req.retrieval_options is None and req.search_doc_ids is None:
+        retrieval_options: RetrievalDetails | None = RetrievalDetails(
+            run_search=OptionalSearchSetting.ALWAYS,
+            real_time=False,
+        )
+    else:
+        retrieval_options = req.retrieval_options
+
+    full_chat_msg_info = CreateChatMessageRequest(
+        chat_session_id=chat_session.id,
+        parent_message_id=chat_message.id,
+        message=query,
+        file_descriptors=[],
+        search_doc_ids=req.search_doc_ids,
+        retrieval_options=retrieval_options,
+        # Simple API does not support reranking, hide complexity from user
+        rerank_settings=None,
+        query_override=None,
+        chunks_above=0,
+        chunks_below=0,
+        full_doc=req.full_doc,
+        structured_response_format=req.structured_response_format,
+    )
+
+    packets = stream_chat_message_objects(
+        new_msg_req=full_chat_msg_info,
+        user=user,
+        db_session=db_session,
+        enforce_chat_session_id_for_search_docs=False,
+    )
+
+    return gather_stream(packets)
--- a/backend/ee/onyx/server/query_and_chat/models.py
+++ b/backend/ee/onyx/server/query_and_chat/models.py
@@ -1,12 +1,18 @@
-from collections.abc import Sequence
-from datetime import datetime
+from collections import OrderedDict
+from typing import Literal
+from uuid import UUID

 from pydantic import BaseModel
 from pydantic import Field
+from pydantic import model_validator

+from onyx.chat.models import ThreadMessage
+from onyx.configs.constants import DocumentSource
 from onyx.context.search.models import BaseFilters
-from onyx.context.search.models import InferenceSection
-from onyx.context.search.models import SearchDoc
+from onyx.context.search.models import BasicChunkRequest
+from onyx.context.search.models import ChunkContext
+from onyx.context.search.models import InferenceChunk
+from onyx.context.search.models import RetrievalDetails
 from onyx.server.manage.models import StandardAnswer


@@ -19,89 +25,119 @@ class StandardAnswerResponse(BaseModel):
    standard_answers: list[StandardAnswer] = Field(default_factory=list)


-class SearchFlowClassificationRequest(BaseModel):
-    user_query: str
+class DocumentSearchRequest(BasicChunkRequest):
+    user_selected_filters: BaseFilters | None = None


-class SearchFlowClassificationResponse(BaseModel):
-    is_search_flow: bool
+class DocumentSearchResponse(BaseModel):
+    top_documents: list[InferenceChunk]


-class SendSearchQueryRequest(BaseModel):
-    search_query: str
-    filters: BaseFilters | None = None
-    num_docs_fed_to_llm_selection: int | None = None
-    run_query_expansion: bool = False
-    num_hits: int = 50
+class BasicCreateChatMessageRequest(ChunkContext):
+    """If a chat_session_id is not provided, a persona_id must be provided to automatically create a new chat session
+    Note, for simplicity this option only allows for a single linear chain of messages
+    """

-    include_content: bool = False
-    stream: bool = False
+    chat_session_id: UUID | None = None
+    # Optional persona_id to create a new chat session if chat_session_id is not provided
+    persona_id: int | None = None
+    # New message contents
+    message: str
+    # Defaults to using retrieval with no additional filters
+    retrieval_options: RetrievalDetails | None = None
+    # Allows the caller to specify the exact search query they want to use
+    # will disable Query Rewording if specified
+    query_override: str | None = None
+    # If search_doc_ids provided, then retrieval options are unused
+    search_doc_ids: list[int] | None = None
+    # only works if using an OpenAI model. See the following for more details:
+    # https://platform.openai.com/docs/guides/structured-outputs/introduction
+    structured_response_format: dict | None = None
+
+    @model_validator(mode="after")
+    def validate_chat_session_or_persona(self) -> "BasicCreateChatMessageRequest":
+        if self.chat_session_id is None and self.persona_id is None:
+            raise ValueError("Either chat_session_id or persona_id must be provided")
+        return self


-class SearchDocWithContent(SearchDoc):
-    # Allows None because this is determined by a flag but the object used in code
-    # of the search path uses this type
-    content: str | None
+class BasicCreateChatMessageWithHistoryRequest(ChunkContext):
+    # Last element is the new query. All previous elements are historical context
+    messages: list[ThreadMessage]
+    persona_id: int
+    retrieval_options: RetrievalDetails | None = None
+    query_override: str | None = None
+    skip_rerank: bool | None = None
+    # If search_doc_ids provided, then retrieval options are unused
+    search_doc_ids: list[int] | None = None
+    # only works if using an OpenAI model. See the following for more details:
+    # https://platform.openai.com/docs/guides/structured-outputs/introduction
+    structured_response_format: dict | None = None

-    @classmethod
-    def from_inference_sections(
-        cls,
-        sections: Sequence[InferenceSection],
-        include_content: bool = False,
-        is_internet: bool = False,
-    ) -> list["SearchDocWithContent"]:
-        """Convert InferenceSections to SearchDocWithContent objects.

-        Args:
-            sections: Sequence of InferenceSection objects
-            include_content: If True, populate content field with combined_content
-            is_internet: Whether these are internet search results
+class SimpleDoc(BaseModel):
+    id: str
+    semantic_identifier: str
+    link: str | None
+    blurb: str
+    match_highlights: list[str]
+    source_type: DocumentSource
+    metadata: dict | None

-        Returns:
-            List of SearchDocWithContent with optional content
+
+class AgentSubQuestion(BaseModel):
+    sub_question: str
+    document_ids: list[str]
+
+
+class AgentAnswer(BaseModel):
+    answer: str
+    answer_type: Literal["agent_sub_answer", "agent_level_answer"]
+
+
+class AgentSubQuery(BaseModel):
+    sub_query: str
+    query_id: int
+
+    @staticmethod
+    def make_dict_by_level_and_question_index(
+        original_dict: dict[tuple[int, int, int], "AgentSubQuery"],
+    ) -> dict[int, dict[int, list["AgentSubQuery"]]]:
+        """Takes a dict of tuple(level, question num, query_id) to sub queries.
+
+        returns a dict of level to dict[question num to list of query_id's]
+        Ordering is asc for readability.
        """
-        if not sections:
-            return []
+        # In this function, when we sort int | None, we deliberately push None to the end

-        return [
-            cls(
-                document_id=(chunk := section.center_chunk).document_id,
-                chunk_ind=chunk.chunk_id,
-                semantic_identifier=chunk.semantic_identifier or "Unknown",
-                link=chunk.source_links[0] if chunk.source_links else None,
-                blurb=chunk.blurb,
-                source_type=chunk.source_type,
-                boost=chunk.boost,
-                hidden=chunk.hidden,
-                metadata=chunk.metadata,
-                score=chunk.score,
-                match_highlights=chunk.match_highlights,
-                updated_at=chunk.updated_at,
-                primary_owners=chunk.primary_owners,
-                secondary_owners=chunk.secondary_owners,
-                is_internet=is_internet,
-                content=section.combined_content if include_content else None,
+        # map entries to the level_question_dict
+        level_question_dict: dict[int, dict[int, list["AgentSubQuery"]]] = {}
+        for k1, obj in original_dict.items():
+            level = k1[0]
+            question = k1[1]
+
+            if level not in level_question_dict:
+                level_question_dict[level] = {}
+
+            if question not in level_question_dict[level]:
+                level_question_dict[level][question] = []
+
+            level_question_dict[level][question].append(obj)
+
+        # sort each query_id list and question_index
+        for key1, obj1 in level_question_dict.items():
+            for key2, value2 in obj1.items():
+                # sort the query_id list of each question_index
+                level_question_dict[key1][key2] = sorted(
+                    value2, key=lambda o: o.query_id
+                )
+            # sort the question_index dict of level
+            level_question_dict[key1] = OrderedDict(
+                sorted(level_question_dict[key1].items(), key=lambda x: (x is None, x))
            )
-            for section in sections
-        ]

-
-class SearchFullResponse(BaseModel):
-    all_executed_queries: list[str]
-    search_docs: list[SearchDocWithContent]
-    # Reasoning tokens output by the LLM for the document selection
-    doc_selection_reasoning: str | None = None
-    # This a list of document ids that are in the search_docs list
-    llm_selected_doc_ids: list[str] | None = None
-    # Error message if the search failed partway through
-    error: str | None = None
-
-
-class SearchQueryResponse(BaseModel):
-    query: str
-    query_expansions: list[str] | None
-    created_at: datetime
-
-
-class SearchHistoryResponse(BaseModel):
-    search_queries: list[SearchQueryResponse]
+        # sort the top dict of levels
+        sorted_dict = OrderedDict(
+            sorted(level_question_dict.items(), key=lambda x: (x is None, x))
+        )
+        return sorted_dict
--- a/backend/ee/onyx/server/query_and_chat/search_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/search_backend.py
@@ -1,170 +0,0 @@
-from collections.abc import Generator
-
-from fastapi import APIRouter
-from fastapi import Depends
-from fastapi import HTTPException
-from fastapi.responses import StreamingResponse
-from sqlalchemy.orm import Session
-
-from ee.onyx.db.search import fetch_search_queries_for_user
-from ee.onyx.search.process_search_query import gather_search_stream
-from ee.onyx.search.process_search_query import stream_search_query
-from ee.onyx.secondary_llm_flows.search_flow_classification import (
-    classify_is_search_flow,
-)
-from ee.onyx.server.query_and_chat.models import SearchFlowClassificationRequest
-from ee.onyx.server.query_and_chat.models import SearchFlowClassificationResponse
-from ee.onyx.server.query_and_chat.models import SearchFullResponse
-from ee.onyx.server.query_and_chat.models import SearchHistoryResponse
-from ee.onyx.server.query_and_chat.models import SearchQueryResponse
-from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
-from ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket
-from onyx.auth.users import current_user
-from onyx.db.engine.sql_engine import get_session
-from onyx.db.engine.sql_engine import get_session_with_current_tenant
-from onyx.db.models import User
-from onyx.llm.factory import get_default_llm
-from onyx.server.usage_limits import check_llm_cost_limit_for_provider
-from onyx.server.utils import get_json_line
-from onyx.utils.logger import setup_logger
-from shared_configs.contextvars import get_current_tenant_id
-
-logger = setup_logger()
-
-router = APIRouter(prefix="/search")
-
-
-@router.post("/search-flow-classification")
-def search_flow_classification(
-    request: SearchFlowClassificationRequest,
-    # This is added just to ensure this endpoint isn't spammed by non-authorized users since there's an LLM call underneath it
-    _: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> SearchFlowClassificationResponse:
-    query = request.user_query
-    # This is a heuristic that if the user is typing a lot of text, it's unlikely they're looking for some specific document
-    # Most likely something needs to be done with the text included so we'll just classify it as a chat flow
-    if len(query) > 200:
-        return SearchFlowClassificationResponse(is_search_flow=False)
-
-    llm = get_default_llm()
-
-    check_llm_cost_limit_for_provider(
-        db_session=db_session,
-        tenant_id=get_current_tenant_id(),
-        llm_provider_api_key=llm.config.api_key,
-    )
-
-    try:
-        is_search_flow = classify_is_search_flow(query=query, llm=llm)
-    except Exception as e:
-        logger.exception(
-            "Search flow classification failed; defaulting to chat flow",
-            exc_info=e,
-        )
-        is_search_flow = False
-
-    return SearchFlowClassificationResponse(is_search_flow=is_search_flow)
-
-
-@router.post("/send-search-message", response_model=None)
-def handle_send_search_message(
-    request: SendSearchQueryRequest,
-    user: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> StreamingResponse | SearchFullResponse:
-    """
-    Execute a search query with optional streaming.
-
-    When stream=True: Returns StreamingResponse with SSE
-    When stream=False: Returns SearchFullResponse
-    """
-    logger.debug(f"Received search query: {request.search_query}")
-
-    # Non-streaming path
-    if not request.stream:
-        try:
-            packets = stream_search_query(request, user, db_session)
-            return gather_search_stream(packets)
-        except NotImplementedError as e:
-            return SearchFullResponse(
-                all_executed_queries=[],
-                search_docs=[],
-                error=str(e),
-            )
-
-    # Streaming path
-    def stream_generator() -> Generator[str, None, None]:
-        try:
-            with get_session_with_current_tenant() as streaming_db_session:
-                for packet in stream_search_query(request, user, streaming_db_session):
-                    yield get_json_line(packet.model_dump())
-        except NotImplementedError as e:
-            yield get_json_line(SearchErrorPacket(error=str(e)).model_dump())
-        except HTTPException:
-            raise
-        except Exception as e:
-            logger.exception("Error in search streaming")
-            yield get_json_line(SearchErrorPacket(error=str(e)).model_dump())
-
-    return StreamingResponse(stream_generator(), media_type="text/event-stream")
-
-
-@router.get("/search-history")
-def get_search_history(
-    limit: int = 100,
-    filter_days: int | None = None,
-    user: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> SearchHistoryResponse:
-    """
-    Fetch past search queries for the authenticated user.
-
-    Args:
-        limit: Maximum number of queries to return (default 100)
-        filter_days: Only return queries from the last N days (optional)
-
-    Returns:
-        SearchHistoryResponse with list of search queries, ordered by most recent first.
-    """
-    # Validate limit
-    if limit <= 0:
-        raise HTTPException(
-            status_code=400,
-            detail="limit must be greater than 0",
-        )
-    if limit > 1000:
-        raise HTTPException(
-            status_code=400,
-            detail="limit must be at most 1000",
-        )
-
-    # Validate filter_days
-    if filter_days is not None and filter_days <= 0:
-        raise HTTPException(
-            status_code=400,
-            detail="filter_days must be greater than 0",
-        )
-
-    # TODO(yuhong) remove this
-    if user is None:
-        # Return empty list for unauthenticated users
-        return SearchHistoryResponse(search_queries=[])
-
-    search_queries = fetch_search_queries_for_user(
-        db_session=db_session,
-        user_id=user.id,
-        filter_days=filter_days,
-        limit=limit,
-    )
-
-    return SearchHistoryResponse(
-        search_queries=[
-            SearchQueryResponse(
-                query=sq.query,
-                query_expansions=sq.query_expansions,
-                created_at=sq.created_at,
-            )
-            for sq in search_queries
-        ]
-    )
--- a/backend/ee/onyx/server/query_and_chat/streaming_models.py
+++ b/backend/ee/onyx/server/query_and_chat/streaming_models.py
@@ -1,35 +0,0 @@
-from typing import Literal
-
-from pydantic import BaseModel
-from pydantic import ConfigDict
-
-from ee.onyx.server.query_and_chat.models import SearchDocWithContent
-
-
-class SearchQueriesPacket(BaseModel):
-    model_config = ConfigDict(frozen=True)
-
-    type: Literal["search_queries"] = "search_queries"
-    all_executed_queries: list[str]
-
-
-class SearchDocsPacket(BaseModel):
-    model_config = ConfigDict(frozen=True)
-
-    type: Literal["search_docs"] = "search_docs"
-    search_docs: list[SearchDocWithContent]
-
-
-class SearchErrorPacket(BaseModel):
-    model_config = ConfigDict(frozen=True)
-
-    type: Literal["search_error"] = "search_error"
-    error: str
-
-
-class LLMSelectedDocsPacket(BaseModel):
-    model_config = ConfigDict(frozen=True)
-
-    type: Literal["llm_selected_docs"] = "llm_selected_docs"
-    # None if LLM selection failed, empty list if no docs selected, list of IDs otherwise
-    llm_selected_doc_ids: list[str] | None
--- a/backend/ee/onyx/server/query_history/api.py
+++ b/backend/ee/onyx/server/query_history/api.py
@@ -32,7 +32,6 @@ from onyx.configs.constants import MessageType
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
-from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.configs.constants import QAFeedbackType
 from onyx.configs.constants import QueryHistoryType
 from onyx.configs.constants import SessionType
@@ -295,7 +294,7 @@ def list_all_query_history_exports(
        )


-@router.post("/admin/query-history/start-export", tags=PUBLIC_API_TAGS)
+@router.post("/admin/query-history/start-export")
 def start_query_history_export(
    _: User | None = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
@@ -341,7 +340,7 @@ def start_query_history_export(
    return {"request_id": task_id}


-@router.get("/admin/query-history/export-status", tags=PUBLIC_API_TAGS)
+@router.get("/admin/query-history/export-status")
 def get_query_history_export_status(
    request_id: str,
    _: User | None = Depends(current_admin_user),
@@ -375,7 +374,7 @@ def get_query_history_export_status(
    return {"status": TaskStatus.SUCCESS}


-@router.get("/admin/query-history/download", tags=PUBLIC_API_TAGS)
+@router.get("/admin/query-history/download")
 def download_query_history_csv(
    request_id: str,
    _: User | None = Depends(current_admin_user),
--- a/backend/ee/onyx/server/settings/init.py
+++ b/backend/ee/onyx/server/settings/init.py
--- a/backend/ee/onyx/server/settings/api.py
+++ b/backend/ee/onyx/server/settings/api.py
@@ -1,54 +0,0 @@
-"""EE Settings API - provides license-aware settings override."""
-
-from redis.exceptions import RedisError
-
-from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
-from ee.onyx.db.license import get_cached_license_metadata
-from onyx.server.settings.models import ApplicationStatus
-from onyx.server.settings.models import Settings
-from onyx.utils.logger import setup_logger
-from shared_configs.configs import MULTI_TENANT
-from shared_configs.contextvars import get_current_tenant_id
-
-logger = setup_logger()
-
-# Statuses that indicate a billing/license problem - propagate these to settings
-_GATED_STATUSES = frozenset(
-    {
-        ApplicationStatus.GATED_ACCESS,
-        ApplicationStatus.GRACE_PERIOD,
-        ApplicationStatus.PAYMENT_REMINDER,
-    }
-)
-
-
-def apply_license_status_to_settings(settings: Settings) -> Settings:
-    """EE version: checks license status for self-hosted deployments.
-
-    For self-hosted, looks up license metadata and overrides application_status
-    if the license is missing or indicates a problem (expired, grace period, etc.).
-
-    For multi-tenant (cloud), the settings already have the correct status
-    from the control plane, so no override is needed.
-
-    If LICENSE_ENFORCEMENT_ENABLED is false, settings are returned unchanged,
-    allowing the product to function normally without license checks.
-    """
-    if not LICENSE_ENFORCEMENT_ENABLED:
-        return settings
-
-    if MULTI_TENANT:
-        return settings
-
-    tenant_id = get_current_tenant_id()
-    try:
-        metadata = get_cached_license_metadata(tenant_id)
-        if metadata and metadata.status in _GATED_STATUSES:
-            settings.application_status = metadata.status
-        elif not metadata:
-            # No license = gated access for self-hosted EE
-            settings.application_status = ApplicationStatus.GATED_ACCESS
-    except RedisError as e:
-        logger.warning(f"Failed to check license metadata for settings: {e}")
-
-    return settings
--- a/backend/ee/onyx/server/tenant_usage_limits.py
+++ b/backend/ee/onyx/server/tenant_usage_limits.py
@@ -1,133 +0,0 @@
-"""Tenant-specific usage limit overrides from the control plane (EE version)."""
-
-import time
-
-import requests
-
-from ee.onyx.server.tenants.access import generate_data_plane_token
-from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
-from onyx.configs.app_configs import DEV_MODE
-from onyx.server.tenant_usage_limits import TenantUsageLimitOverrides
-from onyx.server.usage_limits import NO_LIMIT
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-# In-memory storage for tenant overrides (populated at startup)
-_tenant_usage_limit_overrides: dict[str, TenantUsageLimitOverrides] | None = None
-_last_fetch_time: float = 0.0
-_FETCH_INTERVAL = 60 * 60 * 24  # 24 hours
-_ERROR_FETCH_INTERVAL = 30 * 60  # 30 minutes (if the last fetch failed)
-
-
-def fetch_usage_limit_overrides() -> dict[str, TenantUsageLimitOverrides] | None:
-    """
-    Fetch tenant-specific usage limit overrides from the control plane.
-
-    Returns:
-        Dictionary mapping tenant_id to their specific limit overrides.
-        Returns empty dict on any error (falls back to defaults).
-    """
-    try:
-        token = generate_data_plane_token()
-        headers = {
-            "Authorization": f"Bearer {token}",
-            "Content-Type": "application/json",
-        }
-        url = f"{CONTROL_PLANE_API_BASE_URL}/usage-limit-overrides"
-        response = requests.get(url, headers=headers, timeout=30)
-        response.raise_for_status()
-
-        tenant_overrides = response.json()
-
-        # Parse each tenant's overrides
-        result: dict[str, TenantUsageLimitOverrides] = {}
-        for override_data in tenant_overrides:
-            tenant_id = override_data["tenant_id"]
-            try:
-                result[tenant_id] = TenantUsageLimitOverrides(**override_data)
-            except Exception as e:
-                logger.warning(
-                    f"Failed to parse usage limit overrides for tenant {tenant_id}: {e}"
-                )
-
-        return (
-            result or None
-        )  # if empty dictionary, something went wrong and we shouldn't enforce limits
-
-    except requests.exceptions.RequestException as e:
-        logger.warning(f"Failed to fetch usage limit overrides from control plane: {e}")
-        return None
-    except Exception as e:
-        logger.error(f"Error parsing usage limit overrides: {e}")
-        return None
-
-
-def load_usage_limit_overrides() -> None:
-    """
-    Load tenant usage limit overrides from the control plane.
-    """
-    global _tenant_usage_limit_overrides
-    global _last_fetch_time
-
-    logger.info("Loading tenant usage limit overrides from control plane...")
-    overrides = fetch_usage_limit_overrides()
-
-    _last_fetch_time = time.time()
-
-    # use the new result if it exists, otherwise use the old result
-    # (prevents us from updating to a failed fetch result)
-    _tenant_usage_limit_overrides = overrides or _tenant_usage_limit_overrides
-
-    if overrides:
-        logger.info(f"Loaded usage limit overrides for {len(overrides)} tenants")
-    else:
-        logger.info("No tenant-specific usage limit overrides found")
-
-
-def unlimited(tenant_id: str) -> TenantUsageLimitOverrides:
-    return TenantUsageLimitOverrides(
-        tenant_id=tenant_id,
-        llm_cost_cents_trial=NO_LIMIT,
-        llm_cost_cents_paid=NO_LIMIT,
-        chunks_indexed_trial=NO_LIMIT,
-        chunks_indexed_paid=NO_LIMIT,
-        api_calls_trial=NO_LIMIT,
-        api_calls_paid=NO_LIMIT,
-        non_streaming_calls_trial=NO_LIMIT,
-        non_streaming_calls_paid=NO_LIMIT,
-    )
-
-
-def get_tenant_usage_limit_overrides(
-    tenant_id: str,
-) -> TenantUsageLimitOverrides | None:
-    """
-    Get the usage limit overrides for a specific tenant.
-
-    Args:
-        tenant_id: The tenant ID to look up
-
-    Returns:
-        TenantUsageLimitOverrides if the tenant has overrides, None otherwise.
-    """
-
-    if DEV_MODE:  # in dev mode, we return unlimited limits for all tenants
-        return unlimited(tenant_id)
-
-    global _tenant_usage_limit_overrides
-    time_since = time.time() - _last_fetch_time
-    if (
-        _tenant_usage_limit_overrides is None and time_since > _ERROR_FETCH_INTERVAL
-    ) or (time_since > _FETCH_INTERVAL):
-        logger.debug(
-            f"Last fetch time: {_last_fetch_time}, time since last fetch: {time_since}"
-        )
-
-        load_usage_limit_overrides()
-
-    # If we have failed to fetch from the control plane or we're in dev mode, don't usage limit anyone.
-    if _tenant_usage_limit_overrides is None or DEV_MODE:
-        return unlimited(tenant_id)
-    return _tenant_usage_limit_overrides.get(tenant_id)
--- a/backend/ee/onyx/server/tenants/billing.py
+++ b/backend/ee/onyx/server/tenants/billing.py
@@ -1,9 +1,9 @@
 from typing import cast
-from typing import Literal

 import requests
 import stripe

+from ee.onyx.configs.app_configs import STRIPE_PRICE_ID
 from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
 from ee.onyx.server.tenants.access import generate_data_plane_token
 from ee.onyx.server.tenants.models import BillingInformation
@@ -16,21 +16,15 @@ stripe.api_key = STRIPE_SECRET_KEY
 logger = setup_logger()


-def fetch_stripe_checkout_session(
-    tenant_id: str,
-    billing_period: Literal["monthly", "annual"] = "monthly",
-) -> str:
+def fetch_stripe_checkout_session(tenant_id: str) -> str:
    token = generate_data_plane_token()
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }
    url = f"{CONTROL_PLANE_API_BASE_URL}/create-checkout-session"
-    payload = {
-        "tenant_id": tenant_id,
-        "billing_period": billing_period,
-    }
-    response = requests.post(url, headers=headers, json=payload)
+    params = {"tenant_id": tenant_id}
+    response = requests.post(url, headers=headers, params=params)
    response.raise_for_status()
    return response.json()["sessionId"]

@@ -76,46 +70,24 @@ def fetch_billing_information(
    return BillingInformation(**response_data)


-def fetch_customer_portal_session(tenant_id: str, return_url: str | None = None) -> str:
-    """
-    Fetch a Stripe customer portal session URL from the control plane.
-    NOTE: This is currently only used for multi-tenant (cloud) deployments.
-    Self-hosted proxy endpoints will be added in a future phase.
-    """
-    token = generate_data_plane_token()
-    headers = {
-        "Authorization": f"Bearer {token}",
-        "Content-Type": "application/json",
-    }
-    url = f"{CONTROL_PLANE_API_BASE_URL}/create-customer-portal-session"
-    payload = {"tenant_id": tenant_id}
-    if return_url:
-        payload["return_url"] = return_url
-    response = requests.post(url, headers=headers, json=payload)
-    response.raise_for_status()
-    return response.json()["url"]
-
-
 def register_tenant_users(tenant_id: str, number_of_users: int) -> stripe.Subscription:
    """
-    Update the number of seats for a tenant's subscription.
-    Preserves the existing price (monthly, annual, or grandfathered).
+    Send a request to the control service to register the number of users for a tenant.
    """
+
+    if not STRIPE_PRICE_ID:
+        raise Exception("STRIPE_PRICE_ID is not set")
+
    response = fetch_tenant_stripe_information(tenant_id)
    stripe_subscription_id = cast(str, response.get("stripe_subscription_id"))

    subscription = stripe.Subscription.retrieve(stripe_subscription_id)
-    subscription_item = subscription["items"]["data"][0]
-
-    # Use existing price to preserve the customer's current plan
-    current_price_id = subscription_item.price.id
-
    updated_subscription = stripe.Subscription.modify(
        stripe_subscription_id,
        items=[
            {
-                "id": subscription_item.id,
-                "price": current_price_id,
+                "id": subscription["items"]["data"][0].id,
+                "price": STRIPE_PRICE_ID,
                "quantity": number_of_users,
            }
        ],
--- a/backend/ee/onyx/server/tenants/billing_api.py
+++ b/backend/ee/onyx/server/tenants/billing_api.py
@@ -1,41 +1,33 @@
-import asyncio
-
-import httpx
+import stripe
 from fastapi import APIRouter
 from fastapi import Depends
 from fastapi import HTTPException

 from ee.onyx.auth.users import current_admin_user
+from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
 from ee.onyx.server.tenants.access import control_plane_dep
 from ee.onyx.server.tenants.billing import fetch_billing_information
-from ee.onyx.server.tenants.billing import fetch_customer_portal_session
 from ee.onyx.server.tenants.billing import fetch_stripe_checkout_session
+from ee.onyx.server.tenants.billing import fetch_tenant_stripe_information
 from ee.onyx.server.tenants.models import BillingInformation
-from ee.onyx.server.tenants.models import CreateSubscriptionSessionRequest
 from ee.onyx.server.tenants.models import ProductGatingFullSyncRequest
 from ee.onyx.server.tenants.models import ProductGatingRequest
 from ee.onyx.server.tenants.models import ProductGatingResponse
-from ee.onyx.server.tenants.models import StripePublishableKeyResponse
 from ee.onyx.server.tenants.models import SubscriptionSessionResponse
 from ee.onyx.server.tenants.models import SubscriptionStatusResponse
 from ee.onyx.server.tenants.product_gating import overwrite_full_gated_set
 from ee.onyx.server.tenants.product_gating import store_product_gating
 from onyx.auth.users import User
-from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_OVERRIDE
-from onyx.configs.app_configs import STRIPE_PUBLISHABLE_KEY_URL
 from onyx.configs.app_configs import WEB_DOMAIN
 from onyx.utils.logger import setup_logger
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
 from shared_configs.contextvars import get_current_tenant_id

+stripe.api_key = STRIPE_SECRET_KEY
 logger = setup_logger()

 router = APIRouter(prefix="/tenants")

-# Cache for Stripe publishable key to avoid hitting S3 on every request
-_stripe_publishable_key_cache: str | None = None
-_stripe_key_lock = asyncio.Lock()
-

@router.post("/product-gating")
 def gate_product(
@@ -90,17 +82,21 @@ async def billing_information(
 async def create_customer_portal_session(
    _: User = Depends(current_admin_user),
 ) -> dict:
-    """
-    Create a Stripe customer portal session via the control plane.
-    NOTE: This is currently only used for multi-tenant (cloud) deployments.
-    Self-hosted proxy endpoints will be added in a future phase.
-    """
    tenant_id = get_current_tenant_id()
-    return_url = f"{WEB_DOMAIN}/admin/billing"

    try:
-        portal_url = fetch_customer_portal_session(tenant_id, return_url)
-        return {"url": portal_url}
+        stripe_info = fetch_tenant_stripe_information(tenant_id)
+        stripe_customer_id = stripe_info.get("stripe_customer_id")
+        if not stripe_customer_id:
+            raise HTTPException(status_code=400, detail="Stripe customer ID not found")
+        logger.info(stripe_customer_id)
+
+        portal_session = stripe.billing_portal.Session.create(
+            customer=stripe_customer_id,
+            return_url=f"{WEB_DOMAIN}/admin/billing",
+        )
+        logger.info(portal_session)
+        return {"url": portal_session.url}
    except Exception as e:
        logger.exception("Failed to create customer portal session")
        raise HTTPException(status_code=500, detail=str(e))
@@ -108,82 +104,15 @@ async def create_customer_portal_session(

@router.post("/create-subscription-session")
 async def create_subscription_session(
-    request: CreateSubscriptionSessionRequest | None = None,
    _: User = Depends(current_admin_user),
 ) -> SubscriptionSessionResponse:
    try:
        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
        if not tenant_id:
            raise HTTPException(status_code=400, detail="Tenant ID not found")
-
-        billing_period = request.billing_period if request else "monthly"
-        session_id = fetch_stripe_checkout_session(tenant_id, billing_period)
+        session_id = fetch_stripe_checkout_session(tenant_id)
        return SubscriptionSessionResponse(sessionId=session_id)

    except Exception as e:
-        logger.exception("Failed to create subscription session")
+        logger.exception("Failed to create resubscription session")
        raise HTTPException(status_code=500, detail=str(e))
-
-
-@router.get("/stripe-publishable-key")
-async def get_stripe_publishable_key() -> StripePublishableKeyResponse:
-    """
-    Fetch the Stripe publishable key.
-    Priority: env var override (for testing) > S3 bucket (production).
-    This endpoint is public (no auth required) since publishable keys are safe to expose.
-    The key is cached in memory to avoid hitting S3 on every request.
-    """
-    global _stripe_publishable_key_cache
-
-    # Fast path: return cached value without lock
-    if _stripe_publishable_key_cache:
-        return StripePublishableKeyResponse(
-            publishable_key=_stripe_publishable_key_cache
-        )
-
-    # Use lock to prevent concurrent S3 requests
-    async with _stripe_key_lock:
-        # Double-check after acquiring lock (another request may have populated cache)
-        if _stripe_publishable_key_cache:
-            return StripePublishableKeyResponse(
-                publishable_key=_stripe_publishable_key_cache
-            )
-
-        # Check for env var override first (for local testing with pk_test_* keys)
-        if STRIPE_PUBLISHABLE_KEY_OVERRIDE:
-            key = STRIPE_PUBLISHABLE_KEY_OVERRIDE.strip()
-            if not key.startswith("pk_"):
-                raise HTTPException(
-                    status_code=500,
-                    detail="Invalid Stripe publishable key format",
-                )
-            _stripe_publishable_key_cache = key
-            return StripePublishableKeyResponse(publishable_key=key)
-
-        # Fall back to S3 bucket
-        if not STRIPE_PUBLISHABLE_KEY_URL:
-            raise HTTPException(
-                status_code=500,
-                detail="Stripe publishable key is not configured",
-            )
-
-        try:
-            async with httpx.AsyncClient() as client:
-                response = await client.get(STRIPE_PUBLISHABLE_KEY_URL)
-                response.raise_for_status()
-                key = response.text.strip()
-
-                # Validate key format
-                if not key.startswith("pk_"):
-                    raise HTTPException(
-                        status_code=500,
-                        detail="Invalid Stripe publishable key format",
-                    )
-
-                _stripe_publishable_key_cache = key
-                return StripePublishableKeyResponse(publishable_key=key)
-        except httpx.HTTPError:
-            raise HTTPException(
-                status_code=500,
-                detail="Failed to fetch Stripe publishable key",
-            )
--- a/backend/ee/onyx/server/tenants/models.py
+++ b/backend/ee/onyx/server/tenants/models.py
@@ -1,5 +1,4 @@
 from datetime import datetime
-from typing import Literal

 from pydantic import BaseModel

@@ -74,12 +73,6 @@ class SubscriptionSessionResponse(BaseModel):
    sessionId: str


-class CreateSubscriptionSessionRequest(BaseModel):
-    """Request to create a subscription checkout session."""
-
-    billing_period: Literal["monthly", "annual"] = "monthly"
-
-
 class TenantByDomainResponse(BaseModel):
    tenant_id: str
    number_of_users: int
@@ -105,7 +98,3 @@ class PendingUserSnapshot(BaseModel):

 class ApproveUserRequest(BaseModel):
    email: str
-
-
-class StripePublishableKeyResponse(BaseModel):
-    publishable_key: str
--- a/backend/ee/onyx/server/tenants/product_gating.py
+++ b/backend/ee/onyx/server/tenants/product_gating.py
@@ -65,9 +65,3 @@ def get_gated_tenants() -> set[str]:
    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
    gated_tenants_bytes = cast(set[bytes], redis_client.smembers(GATED_TENANTS_KEY))
    return {tenant_id.decode("utf-8") for tenant_id in gated_tenants_bytes}
-
-
-def is_tenant_gated(tenant_id: str) -> bool:
-    """Fast O(1) check if tenant is in gated set (multi-tenant only)."""
-    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
-    return bool(redis_client.sismember(GATED_TENANTS_KEY, tenant_id))
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -1,4 +1,5 @@
 import asyncio
+import logging
 import uuid

 import aiohttp  # Async HTTP client
@@ -9,7 +10,10 @@ from fastapi import Request
 from sqlalchemy import select
 from sqlalchemy.orm import Session

+from ee.onyx.configs.app_configs import ANTHROPIC_DEFAULT_API_KEY
+from ee.onyx.configs.app_configs import COHERE_DEFAULT_API_KEY
 from ee.onyx.configs.app_configs import HUBSPOT_TRACKING_URL
+from ee.onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY
 from ee.onyx.server.tenants.access import generate_data_plane_token
 from ee.onyx.server.tenants.models import TenantByDomainResponse
 from ee.onyx.server.tenants.models import TenantCreationPayload
@@ -21,18 +25,11 @@ from ee.onyx.server.tenants.user_mapping import add_users_to_tenant
 from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
 from ee.onyx.server.tenants.user_mapping import user_owns_a_tenant
 from onyx.auth.users import exceptions
-from onyx.configs.app_configs import ANTHROPIC_DEFAULT_API_KEY
-from onyx.configs.app_configs import COHERE_DEFAULT_API_KEY
 from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
 from onyx.configs.app_configs import DEV_MODE
-from onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY
-from onyx.configs.app_configs import OPENROUTER_DEFAULT_API_KEY
-from onyx.configs.app_configs import VERTEXAI_DEFAULT_CREDENTIALS
-from onyx.configs.app_configs import VERTEXAI_DEFAULT_LOCATION
 from onyx.configs.constants import MilestoneRecordType
 from onyx.db.engine.sql_engine import get_session_with_shared_schema
 from onyx.db.engine.sql_engine import get_session_with_tenant
-from onyx.db.image_generation import create_default_image_gen_config_from_api_key
 from onyx.db.llm import update_default_provider
 from onyx.db.llm import upsert_cloud_embedding_provider
 from onyx.db.llm import upsert_llm_provider
@@ -40,24 +37,13 @@ from onyx.db.models import AvailableTenant
 from onyx.db.models import IndexModelStatus
 from onyx.db.models import SearchSettings
 from onyx.db.models import UserTenantMapping
-from onyx.llm.well_known_providers.auto_update_models import LLMRecommendations
-from onyx.llm.well_known_providers.constants import ANTHROPIC_PROVIDER_NAME
-from onyx.llm.well_known_providers.constants import OPENAI_PROVIDER_NAME
-from onyx.llm.well_known_providers.constants import OPENROUTER_PROVIDER_NAME
-from onyx.llm.well_known_providers.constants import VERTEX_CREDENTIALS_FILE_KWARG
-from onyx.llm.well_known_providers.constants import VERTEX_LOCATION_KWARG
-from onyx.llm.well_known_providers.constants import VERTEXAI_PROVIDER_NAME
-from onyx.llm.well_known_providers.llm_provider_options import (
-    get_recommendations,
-)
-from onyx.llm.well_known_providers.llm_provider_options import (
-    model_configurations_for_provider,
-)
+from onyx.llm.constants import LlmProviderNames
+from onyx.llm.llm_provider_options import get_anthropic_model_names
+from onyx.llm.llm_provider_options import get_openai_model_names
 from onyx.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest
 from onyx.server.manage.llm.models import LLMProviderUpsertRequest
 from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
 from onyx.setup import setup_onyx
-from onyx.utils.logger import setup_logger
 from onyx.utils.telemetry import mt_cloud_telemetry
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
@@ -66,7 +52,7 @@ from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
 from shared_configs.enums import EmbeddingProvider


-logger = setup_logger()
+logger = logging.getLogger(__name__)


 async def get_or_provision_tenant(
@@ -275,173 +261,59 @@ async def rollback_tenant_provisioning(tenant_id: str) -> None:
        logger.info(f"Tenant rollback completed successfully for tenant {tenant_id}")


-def _build_model_configuration_upsert_requests(
-    provider_name: str,
-    recommendations: LLMRecommendations,
-) -> list[ModelConfigurationUpsertRequest]:
-    model_configurations = model_configurations_for_provider(
-        provider_name, recommendations
-    )
-    return [
-        ModelConfigurationUpsertRequest(
-            name=model_configuration.name,
-            is_visible=model_configuration.is_visible,
-            max_input_tokens=model_configuration.max_input_tokens,
-            supports_image_input=model_configuration.supports_image_input,
-        )
-        for model_configuration in model_configurations
-    ]
-
-
 def configure_default_api_keys(db_session: Session) -> None:
-    """Configure default LLM providers using recommended-models.json for model selection."""
-    # Load recommendations from JSON config
-    recommendations = get_recommendations()
-
-    has_set_default_provider = False
-
-    def _upsert(request: LLMProviderUpsertRequest) -> None:
-        nonlocal has_set_default_provider
-        try:
-            provider = upsert_llm_provider(request, db_session)
-            if not has_set_default_provider:
-                update_default_provider(provider.id, db_session)
-                has_set_default_provider = True
-        except Exception as e:
-            logger.error(f"Failed to configure {request.provider} provider: {e}")
-
-    # Configure OpenAI provider
-    if OPENAI_DEFAULT_API_KEY:
-        default_model = recommendations.get_default_model(OPENAI_PROVIDER_NAME)
-        if default_model is None:
-            logger.error(
-                f"No default model found for {OPENAI_PROVIDER_NAME} in recommendations"
-            )
-        default_model_name = default_model.name if default_model else "gpt-5.2"
-
-        openai_provider = LLMProviderUpsertRequest(
-            name="OpenAI",
-            provider=OPENAI_PROVIDER_NAME,
-            api_key=OPENAI_DEFAULT_API_KEY,
-            default_model_name=default_model_name,
-            model_configurations=_build_model_configuration_upsert_requests(
-                OPENAI_PROVIDER_NAME, recommendations
-            ),
-            api_key_changed=True,
-            is_auto_mode=True,
-        )
-        _upsert(openai_provider)
-
-        # Create default image generation config using the OpenAI API key
-        try:
-            create_default_image_gen_config_from_api_key(
-                db_session, OPENAI_DEFAULT_API_KEY
-            )
-        except Exception as e:
-            logger.error(f"Failed to create default image gen config: {e}")
-    else:
-        logger.info(
-            "OPENAI_DEFAULT_API_KEY not set, skipping OpenAI provider configuration"
-        )
-
-    # Configure Anthropic provider
    if ANTHROPIC_DEFAULT_API_KEY:
-        default_model = recommendations.get_default_model(ANTHROPIC_PROVIDER_NAME)
-        if default_model is None:
-            logger.error(
-                f"No default model found for {ANTHROPIC_PROVIDER_NAME} in recommendations"
-            )
-        default_model_name = (
-            default_model.name if default_model else "claude-sonnet-4-5"
-        )
-
        anthropic_provider = LLMProviderUpsertRequest(
            name="Anthropic",
-            provider=ANTHROPIC_PROVIDER_NAME,
+            provider=LlmProviderNames.ANTHROPIC,
            api_key=ANTHROPIC_DEFAULT_API_KEY,
-            default_model_name=default_model_name,
-            model_configurations=_build_model_configuration_upsert_requests(
-                ANTHROPIC_PROVIDER_NAME, recommendations
-            ),
+            default_model_name="claude-3-7-sonnet-20250219",
+            model_configurations=[
+                ModelConfigurationUpsertRequest(
+                    name=name,
+                    is_visible=False,
+                    max_input_tokens=None,
+                )
+                for name in get_anthropic_model_names()
+            ],
            api_key_changed=True,
-            is_auto_mode=True,
        )
-        _upsert(anthropic_provider)
+        try:
+            full_provider = upsert_llm_provider(anthropic_provider, db_session)
+            update_default_provider(full_provider.id, db_session)
+        except Exception as e:
+            logger.error(f"Failed to configure Anthropic provider: {e}")
    else:
-        logger.info(
+        logger.error(
            "ANTHROPIC_DEFAULT_API_KEY not set, skipping Anthropic provider configuration"
        )

-    # Configure Vertex AI provider
-    if VERTEXAI_DEFAULT_CREDENTIALS:
-        default_model = recommendations.get_default_model(VERTEXAI_PROVIDER_NAME)
-        if default_model is None:
-            logger.error(
-                f"No default model found for {VERTEXAI_PROVIDER_NAME} in recommendations"
-            )
-        default_model_name = default_model.name if default_model else "gemini-2.5-pro"
-
-        # Vertex AI uses custom_config for credentials and location
-        custom_config = {
-            VERTEX_CREDENTIALS_FILE_KWARG: VERTEXAI_DEFAULT_CREDENTIALS,
-            VERTEX_LOCATION_KWARG: VERTEXAI_DEFAULT_LOCATION,
-        }
-
-        vertexai_provider = LLMProviderUpsertRequest(
-            name="Google Vertex AI",
-            provider=VERTEXAI_PROVIDER_NAME,
-            custom_config=custom_config,
-            default_model_name=default_model_name,
-            model_configurations=_build_model_configuration_upsert_requests(
-                VERTEXAI_PROVIDER_NAME, recommendations
-            ),
+    if OPENAI_DEFAULT_API_KEY:
+        openai_provider = LLMProviderUpsertRequest(
+            name="OpenAI",
+            provider=LlmProviderNames.OPENAI,
+            api_key=OPENAI_DEFAULT_API_KEY,
+            default_model_name="gpt-4o",
+            model_configurations=[
+                ModelConfigurationUpsertRequest(
+                    name=model_name,
+                    is_visible=False,
+                    max_input_tokens=None,
+                )
+                for model_name in get_openai_model_names()
+            ],
            api_key_changed=True,
-            is_auto_mode=True,
        )
-        _upsert(vertexai_provider)
+        try:
+            full_provider = upsert_llm_provider(openai_provider, db_session)
+            update_default_provider(full_provider.id, db_session)
+        except Exception as e:
+            logger.error(f"Failed to configure OpenAI provider: {e}")
    else:
-        logger.info(
-            "VERTEXAI_DEFAULT_CREDENTIALS not set, skipping Vertex AI provider configuration"
+        logger.error(
+            "OPENAI_DEFAULT_API_KEY not set, skipping OpenAI provider configuration"
        )

-    # Configure OpenRouter provider
-    if OPENROUTER_DEFAULT_API_KEY:
-        default_model = recommendations.get_default_model(OPENROUTER_PROVIDER_NAME)
-        if default_model is None:
-            logger.error(
-                f"No default model found for {OPENROUTER_PROVIDER_NAME} in recommendations"
-            )
-        default_model_name = default_model.name if default_model else "z-ai/glm-4.7"
-
-        # For OpenRouter, we use the visible models from recommendations as model_configurations
-        # since OpenRouter models are dynamic (fetched from their API)
-        visible_models = recommendations.get_visible_models(OPENROUTER_PROVIDER_NAME)
-        model_configurations = [
-            ModelConfigurationUpsertRequest(
-                name=model.name,
-                is_visible=True,
-                max_input_tokens=None,
-                display_name=model.display_name,
-            )
-            for model in visible_models
-        ]
-
-        openrouter_provider = LLMProviderUpsertRequest(
-            name="OpenRouter",
-            provider=OPENROUTER_PROVIDER_NAME,
-            api_key=OPENROUTER_DEFAULT_API_KEY,
-            default_model_name=default_model_name,
-            model_configurations=model_configurations,
-            api_key_changed=True,
-            is_auto_mode=True,
-        )
-        _upsert(openrouter_provider)
-    else:
-        logger.info(
-            "OPENROUTER_DEFAULT_API_KEY not set, skipping OpenRouter provider configuration"
-        )
-
-    # Configure Cohere embedding provider
    if COHERE_DEFAULT_API_KEY:
        cloud_embedding_provider = CloudEmbeddingProviderCreationRequest(
            provider_type=EmbeddingProvider.COHERE,
--- a/backend/ee/onyx/server/token_rate_limits/api.py
+++ b/backend/ee/onyx/server/token_rate_limits/api.py
@@ -9,7 +9,6 @@ from ee.onyx.db.token_limit import fetch_user_group_token_rate_limits_for_user
 from ee.onyx.db.token_limit import insert_user_group_token_rate_limit
 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_curator_or_admin_user
-from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import User
 from onyx.db.token_limit import fetch_all_user_token_rate_limits
@@ -18,7 +17,7 @@ from onyx.server.query_and_chat.token_limit import any_rate_limit_exists
 from onyx.server.token_rate_limits.models import TokenRateLimitArgs
 from onyx.server.token_rate_limits.models import TokenRateLimitDisplay

-router = APIRouter(prefix="/admin/token-rate-limits", tags=PUBLIC_API_TAGS)
+router = APIRouter(prefix="/admin/token-rate-limits")


 """
--- a/backend/ee/onyx/server/usage_limits.py
+++ b/backend/ee/onyx/server/usage_limits.py
@@ -1,38 +0,0 @@
-"""EE Usage limits - trial detection via billing information."""
-
-from ee.onyx.server.tenants.billing import fetch_billing_information
-from ee.onyx.server.tenants.models import BillingInformation
-from ee.onyx.server.tenants.models import SubscriptionStatusResponse
-from onyx.utils.logger import setup_logger
-from shared_configs.configs import MULTI_TENANT
-
-logger = setup_logger()
-
-
-def is_tenant_on_trial(tenant_id: str) -> bool:
-    """
-    Determine if a tenant is currently on a trial subscription.
-
-    In multi-tenant mode, we fetch billing information from the control plane
-    to determine if the tenant has an active trial.
-    """
-    if not MULTI_TENANT:
-        return False
-
-    try:
-        billing_info = fetch_billing_information(tenant_id)
-
-        # If not subscribed at all, check if we have trial information
-        if isinstance(billing_info, SubscriptionStatusResponse):
-            # No subscription means they're likely on trial (new tenant)
-            return True
-
-        if isinstance(billing_info, BillingInformation):
-            return billing_info.status == "trialing"
-
-        return False
-
-    except Exception as e:
-        logger.warning(f"Failed to fetch billing info for trial check: {e}")
-        # Default to trial limits on error (more restrictive = safer)
-        return True
--- a/backend/ee/onyx/server/user_group/api.py
+++ b/backend/ee/onyx/server/user_group/api.py
@@ -18,7 +18,6 @@ from ee.onyx.server.user_group.models import UserGroupCreate
 from ee.onyx.server.user_group.models import UserGroupUpdate
 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_curator_or_admin_user
-from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import User
 from onyx.db.models import UserRole
@@ -26,7 +25,7 @@ from onyx.utils.logger import setup_logger

 logger = setup_logger()

-router = APIRouter(prefix="/manage", tags=PUBLIC_API_TAGS)
+router = APIRouter(prefix="/manage")


@router.get("/admin/user-group")
--- a/backend/onyx/access/access.py
+++ b/backend/onyx/access/access.py
@@ -97,14 +97,10 @@ def get_access_for_documents(


 def _get_acl_for_user(user: User | None, db_session: Session) -> set[str]:
-    """Returns a list of ACL entries that the user has access to.
-
-    This is meant to be used downstream to filter out documents that the user
-    does not have access to. The user should have access to a document if at
-    least one entry in the document's ACL matches one entry in the returned set.
-
-    NOTE: These strings must be formatted in the same way as the output of
-    DocumentAccess::to_acl.
+    """Returns a list of ACL entries that the user has access to. This is meant to be
+    used downstream to filter out documents that the user does not have access to. The
+    user should have access to a document if at least one entry in the document's ACL
+    matches one entry in the returned set.
    """
    if user:
        return {prefix_user_email(user.email), PUBLIC_DOC_PAT}
--- a/backend/onyx/access/models.py
+++ b/backend/onyx/access/models.py
@@ -105,8 +105,6 @@ class DocExternalAccess:
        )


-# TODO(andrei): First refactor this into a pydantic model, then get rid of
-# duplicate fields.
@dataclass(frozen=True, init=False)
 class DocumentAccess(ExternalAccess):
    # User emails for Onyx users, None indicates admin
@@ -125,11 +123,9 @@ class DocumentAccess(ExternalAccess):
        )

    def to_acl(self) -> set[str]:
-        """Converts the access state to a set of formatted ACL strings.
+        # the acl's emitted by this function are prefixed by type
+        # to get the native objects, access the member variables directly

-        NOTE: When querying for documents, the supplied ACL filter strings must
-        be formatted in the same way as this function.
-        """
        acl_set: set[str] = set()
        for user_email in self.user_emails:
            if user_email:
--- a/backend/onyx/auth/captcha.py
+++ b/backend/onyx/auth/captcha.py
@@ -1,107 +0,0 @@
-"""Captcha verification for user registration."""
-
-import httpx
-from pydantic import BaseModel
-from pydantic import Field
-
-from onyx.configs.app_configs import CAPTCHA_ENABLED
-from onyx.configs.app_configs import RECAPTCHA_SCORE_THRESHOLD
-from onyx.configs.app_configs import RECAPTCHA_SECRET_KEY
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-RECAPTCHA_VERIFY_URL = "https://www.google.com/recaptcha/api/siteverify"
-
-
-class CaptchaVerificationError(Exception):
-    """Raised when captcha verification fails."""
-
-
-class RecaptchaResponse(BaseModel):
-    """Response from Google reCAPTCHA verification API."""
-
-    success: bool
-    score: float | None = None  # Only present for reCAPTCHA v3
-    action: str | None = None
-    challenge_ts: str | None = None
-    hostname: str | None = None
-    error_codes: list[str] | None = Field(default=None, alias="error-codes")
-
-
-def is_captcha_enabled() -> bool:
-    """Check if captcha verification is enabled."""
-    return CAPTCHA_ENABLED and bool(RECAPTCHA_SECRET_KEY)
-
-
-async def verify_captcha_token(
-    token: str,
-    expected_action: str = "signup",
-) -> None:
-    """
-    Verify a reCAPTCHA token with Google's API.
-
-    Args:
-        token: The reCAPTCHA response token from the client
-        expected_action: Expected action name for v3 verification
-
-    Raises:
-        CaptchaVerificationError: If verification fails
-    """
-    if not is_captcha_enabled():
-        return
-
-    if not token:
-        raise CaptchaVerificationError("Captcha token is required")
-
-    try:
-        async with httpx.AsyncClient() as client:
-            response = await client.post(
-                RECAPTCHA_VERIFY_URL,
-                data={
-                    "secret": RECAPTCHA_SECRET_KEY,
-                    "response": token,
-                },
-                timeout=10.0,
-            )
-            response.raise_for_status()
-
-            data = response.json()
-            result = RecaptchaResponse(**data)
-
-            if not result.success:
-                error_codes = result.error_codes or ["unknown-error"]
-                logger.warning(f"Captcha verification failed: {error_codes}")
-                raise CaptchaVerificationError(
-                    f"Captcha verification failed: {', '.join(error_codes)}"
-                )
-
-            # For reCAPTCHA v3, also check the score
-            if result.score is not None:
-                if result.score < RECAPTCHA_SCORE_THRESHOLD:
-                    logger.warning(
-                        f"Captcha score too low: {result.score} < {RECAPTCHA_SCORE_THRESHOLD}"
-                    )
-                    raise CaptchaVerificationError(
-                        "Captcha verification failed: suspicious activity detected"
-                    )
-
-                # Optionally verify the action matches
-                if result.action and result.action != expected_action:
-                    logger.warning(
-                        f"Captcha action mismatch: {result.action} != {expected_action}"
-                    )
-                    raise CaptchaVerificationError(
-                        "Captcha verification failed: action mismatch"
-                    )
-
-            logger.debug(
-                f"Captcha verification passed: score={result.score}, "
-                f"action={result.action}"
-            )
-
-    except httpx.HTTPError as e:
-        logger.error(f"Captcha API request failed: {e}")
-        # In case of API errors, we might want to allow registration
-        # to prevent blocking legitimate users. This is a policy decision.
-        raise CaptchaVerificationError("Captcha verification service unavailable")
--- a/backend/onyx/auth/disposable_email_validator.py
+++ b/backend/onyx/auth/disposable_email_validator.py
@@ -1,192 +0,0 @@
-"""
-Utility to validate and block disposable/temporary email addresses.
-
-This module fetches a list of known disposable email domains from a remote source
-and caches them for performance. It's used during user registration to prevent
-abuse from temporary email services.
-"""
-
-import threading
-import time
-from typing import Set
-
-import httpx
-
-from onyx.configs.app_configs import DISPOSABLE_EMAIL_DOMAINS_URL
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-class DisposableEmailValidator:
-    """
-    Thread-safe singleton validator for disposable email domains.
-
-    Fetches and caches the list of disposable domains, with periodic refresh.
-    """
-
-    _instance: "DisposableEmailValidator | None" = None
-    _lock = threading.Lock()
-
-    def __new__(cls) -> "DisposableEmailValidator":
-        if cls._instance is None:
-            with cls._lock:
-                if cls._instance is None:
-                    cls._instance = super().__new__(cls)
-        return cls._instance
-
-    def __init__(self) -> None:
-        # Check if already initialized using a try/except to avoid type issues
-        try:
-            if self._initialized:
-                return
-        except AttributeError:
-            pass
-
-        self._domains: Set[str] = set()
-        self._last_fetch_time: float = 0
-        self._fetch_lock = threading.Lock()
-        # Cache for 1 hour
-        self._cache_duration = 3600
-        # Hardcoded fallback list of common disposable domains
-        # This ensures we block at least these even if the remote fetch fails
-        self._fallback_domains = {
-            "trashlify.com",
-            "10minutemail.com",
-            "guerrillamail.com",
-            "mailinator.com",
-            "tempmail.com",
-            "throwaway.email",
-            "yopmail.com",
-            "temp-mail.org",
-            "getnada.com",
-            "maildrop.cc",
-        }
-        # Set initialized flag last to prevent race conditions
-        self._initialized: bool = True
-
-    def _should_refresh(self) -> bool:
-        """Check if the cached domains should be refreshed."""
-        return (time.time() - self._last_fetch_time) > self._cache_duration
-
-    def _fetch_domains(self) -> Set[str]:
-        """
-        Fetch disposable email domains from the configured URL.
-
-        Returns:
-            Set of domain strings (lowercased)
-        """
-        if not DISPOSABLE_EMAIL_DOMAINS_URL:
-            logger.debug("DISPOSABLE_EMAIL_DOMAINS_URL not configured")
-            return self._fallback_domains.copy()
-
-        try:
-            logger.info(
-                f"Fetching disposable email domains from {DISPOSABLE_EMAIL_DOMAINS_URL}"
-            )
-            with httpx.Client(timeout=10.0) as client:
-                response = client.get(DISPOSABLE_EMAIL_DOMAINS_URL)
-                response.raise_for_status()
-
-                domains_list = response.json()
-
-                if not isinstance(domains_list, list):
-                    logger.error(
-                        f"Expected list from disposable domains URL, got {type(domains_list)}"
-                    )
-                    return self._fallback_domains.copy()
-
-                # Convert all to lowercase and create set
-                domains = {domain.lower().strip() for domain in domains_list if domain}
-
-                # Always include fallback domains
-                domains.update(self._fallback_domains)
-
-                logger.info(
-                    f"Successfully fetched {len(domains)} disposable email domains"
-                )
-                return domains
-
-        except httpx.HTTPError as e:
-            logger.warning(f"Failed to fetch disposable domains (HTTP error): {e}")
-        except Exception as e:
-            logger.warning(f"Failed to fetch disposable domains: {e}")
-
-        # On error, return fallback domains
-        return self._fallback_domains.copy()
-
-    def get_domains(self) -> Set[str]:
-        """
-        Get the cached set of disposable email domains.
-        Refreshes the cache if needed.
-
-        Returns:
-            Set of disposable domain strings (lowercased)
-        """
-        # Fast path: return cached domains if still fresh
-        if self._domains and not self._should_refresh():
-            return self._domains.copy()
-
-        # Slow path: need to refresh
-        with self._fetch_lock:
-            # Double-check after acquiring lock
-            if self._domains and not self._should_refresh():
-                return self._domains.copy()
-
-            self._domains = self._fetch_domains()
-            self._last_fetch_time = time.time()
-            return self._domains.copy()
-
-    def is_disposable(self, email: str) -> bool:
-        """
-        Check if an email address uses a disposable domain.
-
-        Args:
-            email: The email address to check
-
-        Returns:
-            True if the email domain is disposable, False otherwise
-        """
-        if not email or "@" not in email:
-            return False
-
-        parts = email.split("@")
-        if len(parts) != 2 or not parts[0]:  # Must have user@domain with non-empty user
-            return False
-
-        domain = parts[1].lower().strip()
-        if not domain:  # Domain part must not be empty
-            return False
-
-        disposable_domains = self.get_domains()
-        return domain in disposable_domains
-
-
-# Global singleton instance
-_validator = DisposableEmailValidator()
-
-
-def is_disposable_email(email: str) -> bool:
-    """
-    Check if an email address uses a disposable/temporary domain.
-
-    This is a convenience function that uses the global validator instance.
-
-    Args:
-        email: The email address to check
-
-    Returns:
-        True if the email uses a disposable domain, False otherwise
-    """
-    return _validator.is_disposable(email)
-
-
-def refresh_disposable_domains() -> None:
-    """
-    Force a refresh of the disposable domains list.
-
-    This can be called manually if you want to update the list
-    without waiting for the cache to expire.
-    """
-    _validator._last_fetch_time = 0
-    _validator.get_domains()
--- a/backend/onyx/auth/schemas.py
+++ b/backend/onyx/auth/schemas.py
@@ -40,8 +40,6 @@ class UserRead(schemas.BaseUser[uuid.UUID]):
 class UserCreate(schemas.BaseUserCreate):
    role: UserRole = UserRole.BASIC
    tenant_id: str | None = None
-    # Captcha token for cloud signup protection (optional, only used when captcha is enabled)
-    captcha_token: str | None = None


 class UserUpdateWithRole(schemas.BaseUserUpdate):
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -11,7 +11,6 @@ from typing import Any
 from typing import cast
 from typing import Dict
 from typing import List
-from typing import Literal
 from typing import Optional
 from typing import Protocol
 from typing import Tuple
@@ -61,7 +60,6 @@ from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncSession

 from onyx.auth.api_key import get_hashed_api_key_from_request
-from onyx.auth.disposable_email_validator import is_disposable_email
 from onyx.auth.email_utils import send_forgot_password_email
 from onyx.auth.email_utils import send_user_verification_email
 from onyx.auth.invited_users import get_invited_users
@@ -250,23 +248,13 @@ def verify_email_in_whitelist(email: str, tenant_id: str) -> None:


 def verify_email_domain(email: str) -> None:
-    if email.count("@") != 1:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Email is not valid",
-        )
-
-    domain = email.split("@")[-1].lower()
-
-    # Check if email uses a disposable/temporary domain
-    if is_disposable_email(email):
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Disposable email addresses are not allowed. Please use a permanent email address.",
-        )
-
-    # Check domain whitelist if configured
    if VALID_EMAIL_DOMAINS:
+        if email.count("@") != 1:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Email is not valid",
+            )
+        domain = email.split("@")[-1].lower()
        if domain not in VALID_EMAIL_DOMAINS:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
@@ -304,57 +292,11 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        safe: bool = False,
        request: Optional[Request] = None,
    ) -> User:
-        # Verify captcha if enabled (for cloud signup protection)
-        from onyx.auth.captcha import CaptchaVerificationError
-        from onyx.auth.captcha import is_captcha_enabled
-        from onyx.auth.captcha import verify_captcha_token
-
-        if is_captcha_enabled() and request is not None:
-            # Get captcha token from request body or headers
-            captcha_token = None
-            if hasattr(user_create, "captcha_token"):
-                captcha_token = getattr(user_create, "captcha_token", None)
-
-            # Also check headers as a fallback
-            if not captcha_token:
-                captcha_token = request.headers.get("X-Captcha-Token")
-
-            try:
-                await verify_captcha_token(
-                    captcha_token or "", expected_action="signup"
-                )
-            except CaptchaVerificationError as e:
-                raise HTTPException(
-                    status_code=status.HTTP_400_BAD_REQUEST,
-                    detail={"reason": str(e)},
-                )
-
        # We verify the password here to make sure it's valid before we proceed
        await self.validate_password(
            user_create.password, cast(schemas.UC, user_create)
        )

-        # Check for disposable emails BEFORE provisioning tenant
-        # This prevents creating tenants for throwaway email addresses
-        try:
-            verify_email_domain(user_create.email)
-        except HTTPException as e:
-            # Log blocked disposable email attempts
-            if (
-                e.status_code == status.HTTP_400_BAD_REQUEST
-                and "Disposable email" in str(e.detail)
-            ):
-                domain = (
-                    user_create.email.split("@")[-1]
-                    if "@" in user_create.email
-                    else "unknown"
-                )
-                logger.warning(
-                    f"Blocked disposable email registration attempt: {domain}",
-                    extra={"email_domain": domain},
-                )
-            raise
-
        user_count: int | None = None
        referral_source = (
            request.cookies.get("referral_source", None)
@@ -376,17 +318,8 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
        try:
            async with get_async_session_context_manager(tenant_id) as db_session:
-                # Check invite list based on deployment mode
-                if MULTI_TENANT:
-                    # Multi-tenant: Only require invite for existing tenants
-                    # New tenant creation (first user) doesn't require an invite
-                    user_count = await get_user_count()
-                    if user_count > 0:
-                        # Tenant already has users - require invite for new users
-                        verify_email_is_invited(user_create.email)
-                else:
-                    # Single-tenant: Check invite list (skips if SAML/OIDC or no list configured)
-                    verify_email_is_invited(user_create.email)
+                verify_email_is_invited(user_create.email)
+                verify_email_domain(user_create.email)
                if MULTI_TENANT:
                    tenant_user_db = SQLAlchemyUserAdminDB[User, uuid.UUID](
                        db_session, User, OAuthAccount
@@ -1457,9 +1390,6 @@ def get_default_admin_user_emails_() -> list[str]:


 STATE_TOKEN_AUDIENCE = "fastapi-users:oauth-state"
-STATE_TOKEN_LIFETIME_SECONDS = 3600
-CSRF_TOKEN_KEY = "csrftoken"
-CSRF_TOKEN_COOKIE_NAME = "fastapiusersoauthcsrf"


 class OAuth2AuthorizeResponse(BaseModel):
@@ -1467,19 +1397,13 @@ class OAuth2AuthorizeResponse(BaseModel):


 def generate_state_token(
-    data: Dict[str, str],
-    secret: SecretType,
-    lifetime_seconds: int = STATE_TOKEN_LIFETIME_SECONDS,
+    data: Dict[str, str], secret: SecretType, lifetime_seconds: int = 3600
 ) -> str:
    data["aud"] = STATE_TOKEN_AUDIENCE

    return generate_jwt(data, secret, lifetime_seconds)


-def generate_csrf_token() -> str:
-    return secrets.token_urlsafe(32)
-
-
 # refer to https://github.com/fastapi-users/fastapi-users/blob/42ddc241b965475390e2bce887b084152ae1a2cd/fastapi_users/fastapi_users.py#L91
 def create_onyx_oauth_router(
    oauth_client: BaseOAuth2,
@@ -1508,13 +1432,6 @@ def get_oauth_router(
    redirect_url: Optional[str] = None,
    associate_by_email: bool = False,
    is_verified_by_default: bool = False,
-    *,
-    csrf_token_cookie_name: str = CSRF_TOKEN_COOKIE_NAME,
-    csrf_token_cookie_path: str = "/",
-    csrf_token_cookie_domain: Optional[str] = None,
-    csrf_token_cookie_secure: Optional[bool] = None,
-    csrf_token_cookie_httponly: bool = True,
-    csrf_token_cookie_samesite: Optional[Literal["lax", "strict", "none"]] = "lax",
 ) -> APIRouter:
    """Generate a router with the OAuth routes."""
    router = APIRouter()
@@ -1531,9 +1448,6 @@ def get_oauth_router(
            route_name=callback_route_name,
        )

-    if csrf_token_cookie_secure is None:
-        csrf_token_cookie_secure = WEB_DOMAIN.startswith("https")
-
    @router.get(
        "/authorize",
        name=f"oauth:{oauth_client.name}.{backend.name}.authorize",
@@ -1541,10 +1455,8 @@ def get_oauth_router(
    )
    async def authorize(
        request: Request,
-        response: Response,
-        redirect: bool = Query(False),
        scopes: List[str] = Query(None),
-    ) -> Response | OAuth2AuthorizeResponse:
+    ) -> OAuth2AuthorizeResponse:
        referral_source = request.cookies.get("referral_source", None)

        if redirect_url is not None:
@@ -1554,11 +1466,9 @@ def get_oauth_router(

        next_url = request.query_params.get("next", "/")

-        csrf_token = generate_csrf_token()
        state_data: Dict[str, str] = {
            "next_url": next_url,
            "referral_source": referral_source or "default_referral",
-            CSRF_TOKEN_KEY: csrf_token,
        }
        state = generate_state_token(state_data, state_secret)

@@ -1575,31 +1485,6 @@ def get_oauth_router(
                authorization_url, {"access_type": "offline", "prompt": "consent"}
            )

-        if redirect:
-            redirect_response = RedirectResponse(authorization_url, status_code=302)
-            redirect_response.set_cookie(
-                key=csrf_token_cookie_name,
-                value=csrf_token,
-                max_age=STATE_TOKEN_LIFETIME_SECONDS,
-                path=csrf_token_cookie_path,
-                domain=csrf_token_cookie_domain,
-                secure=csrf_token_cookie_secure,
-                httponly=csrf_token_cookie_httponly,
-                samesite=csrf_token_cookie_samesite,
-            )
-            return redirect_response
-
-        response.set_cookie(
-            key=csrf_token_cookie_name,
-            value=csrf_token,
-            max_age=STATE_TOKEN_LIFETIME_SECONDS,
-            path=csrf_token_cookie_path,
-            domain=csrf_token_cookie_domain,
-            secure=csrf_token_cookie_secure,
-            httponly=csrf_token_cookie_httponly,
-            samesite=csrf_token_cookie_samesite,
-        )
-
        return OAuth2AuthorizeResponse(authorization_url=authorization_url)

    @log_function_time(print_only=True)
@@ -1649,33 +1534,7 @@ def get_oauth_router(
        try:
            state_data = decode_jwt(state, state_secret, [STATE_TOKEN_AUDIENCE])
        except jwt.DecodeError:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail=getattr(
-                    ErrorCode, "ACCESS_TOKEN_DECODE_ERROR", "ACCESS_TOKEN_DECODE_ERROR"
-                ),
-            )
-        except jwt.ExpiredSignatureError:
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail=getattr(
-                    ErrorCode,
-                    "ACCESS_TOKEN_ALREADY_EXPIRED",
-                    "ACCESS_TOKEN_ALREADY_EXPIRED",
-                ),
-            )
-
-        cookie_csrf_token = request.cookies.get(csrf_token_cookie_name)
-        state_csrf_token = state_data.get(CSRF_TOKEN_KEY)
-        if (
-            not cookie_csrf_token
-            or not state_csrf_token
-            or not secrets.compare_digest(cookie_csrf_token, state_csrf_token)
-        ):
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail=getattr(ErrorCode, "OAUTH_INVALID_STATE", "OAUTH_INVALID_STATE"),
-            )
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST)

        next_url = state_data.get("next_url", "/")
        referral_source = state_data.get("referral_source", None)
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -26,13 +26,9 @@ from onyx.background.celery.celery_utils import celery_is_worker_primary
 from onyx.background.celery.celery_utils import make_probe_path
 from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_PREFIX
 from onyx.background.celery.tasks.vespa.document_sync import DOCUMENT_SYNC_TASKSET_KEY
-from onyx.configs.app_configs import ENABLE_OPENSEARCH_INDEXING_FOR_ONYX
 from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.db.engine.sql_engine import get_sqlalchemy_engine
-from onyx.document_index.opensearch.client import (
-    wait_for_opensearch_with_timeout,
-)
 from onyx.document_index.vespa.shared_utils.utils import wait_for_vespa_with_timeout
 from onyx.httpx.httpx_pool import HttpxPool
 from onyx.redis.redis_connector import RedisConnector
@@ -520,16 +516,10 @@ def wait_for_vespa_or_shutdown(sender: Any, **kwargs: Any) -> None:
    Raises WorkerShutdown if the timeout is reached."""

    if not wait_for_vespa_with_timeout():
-        msg = "[Vespa] Readiness probe did not succeed within the timeout. Exiting..."
+        msg = "Vespa: Readiness probe did not succeed within the timeout. Exiting..."
        logger.error(msg)
        raise WorkerShutdown(msg)

-    if ENABLE_OPENSEARCH_INDEXING_FOR_ONYX:
-        if not wait_for_opensearch_with_timeout():
-            msg = "[OpenSearch] Readiness probe did not succeed within the timeout. Exiting..."
-            logger.error(msg)
-            raise WorkerShutdown(msg)
-

 # File for validating worker liveness
 class LivenessProbe(bootsteps.StartStopStep):
--- a/backend/onyx/background/celery/apps/background.py
+++ b/backend/onyx/background/celery/apps/background.py
@@ -124,7 +124,6 @@ celery_app.autodiscover_tasks(
        "onyx.background.celery.tasks.kg_processing",
        "onyx.background.celery.tasks.monitoring",
        "onyx.background.celery.tasks.user_file_processing",
-        "onyx.background.celery.tasks.llm_model_update",
        # Light worker tasks
        "onyx.background.celery.tasks.shared",
        "onyx.background.celery.tasks.vespa",
--- a/backend/onyx/background/celery/apps/docfetching.py
+++ b/backend/onyx/background/celery/apps/docfetching.py
@@ -98,5 +98,8 @@ for bootstep in base_bootsteps:
 celery_app.autodiscover_tasks(
    [
        "onyx.background.celery.tasks.docfetching",
+        # Ensure the user files indexing worker registers the doc_id migration task
+        # TODO(subash): remove this once the doc_id migration is complete
+        "onyx.background.celery.tasks.user_file_processing",
    ]
 )
--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -2,12 +2,8 @@ import copy
 from datetime import timedelta
 from typing import Any

-from celery.schedules import crontab
-
-from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
-from onyx.configs.app_configs import AUTO_LLM_UPDATE_INTERVAL_SECONDS
 from onyx.configs.app_configs import ENTERPRISE_EDITION_ENABLED
-from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
+from onyx.configs.app_configs import LLM_MODEL_UPDATE_API_URL
 from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
@@ -57,6 +53,16 @@ beat_task_templates: list[dict] = [
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
+    {
+        "name": "user-file-docid-migration",
+        "task": OnyxCeleryTask.USER_FILE_DOCID_MIGRATION,
+        "schedule": timedelta(minutes=10),
+        "options": {
+            "priority": OnyxCeleryPriority.HIGH,
+            "expires": BEAT_EXPIRES_DEFAULT,
+            "queue": OnyxCeleryQueues.USER_FILES_INDEXING,
+        },
+    },
    {
        "name": "check-for-kg-processing",
        "task": OnyxCeleryTask.CHECK_KG_PROCESSING,
@@ -165,32 +171,13 @@ if ENTERPRISE_EDITION_ENABLED:
        ]
    )

-# Add the Auto LLM update task if the config URL is set (has a default)
-if AUTO_LLM_CONFIG_URL:
+# Only add the LLM model update task if the API URL is configured
+if LLM_MODEL_UPDATE_API_URL:
    beat_task_templates.append(
        {
-            "name": "check-for-auto-llm-update",
-            "task": OnyxCeleryTask.CHECK_FOR_AUTO_LLM_UPDATE,
-            "schedule": timedelta(seconds=AUTO_LLM_UPDATE_INTERVAL_SECONDS),
-            "options": {
-                "priority": OnyxCeleryPriority.LOW,
-                "expires": BEAT_EXPIRES_DEFAULT,
-            },
-        }
-    )
-
-# Add scheduled eval task if datasets are configured
-if SCHEDULED_EVAL_DATASET_NAMES:
-    beat_task_templates.append(
-        {
-            "name": "scheduled-eval-pipeline",
-            "task": OnyxCeleryTask.SCHEDULED_EVAL_TASK,
-            # run every Sunday at midnight UTC
-            "schedule": crontab(
-                hour=0,
-                minute=0,
-                day_of_week=0,
-            ),
+            "name": "check-for-llm-model-update",
+            "task": OnyxCeleryTask.CHECK_FOR_LLM_MODEL_UPDATE,
+            "schedule": timedelta(hours=1),  # Check every hour
            "options": {
                "priority": OnyxCeleryPriority.LOW,
                "expires": BEAT_EXPIRES_DEFAULT,
--- a/backend/onyx/background/celery/tasks/docfetching/task_creation_utils.py
+++ b/backend/onyx/background/celery/tasks/docfetching/task_creation_utils.py
@@ -72,6 +72,15 @@ def try_creating_docfetching_task(
            # Another indexing attempt is already running
            return None

+        # Determine which queue to use based on whether this is a user file
+        # TODO: at the moment the indexing pipeline is
+        # shared between user files and connectors
+        queue = (
+            OnyxCeleryQueues.USER_FILES_INDEXING
+            if cc_pair.is_user_file
+            else OnyxCeleryQueues.CONNECTOR_DOC_FETCHING
+        )
+
        # Use higher priority for first-time indexing to ensure new connectors
        # get processed before re-indexing of existing connectors
        has_successful_attempt = cc_pair.last_successful_index_time is not None
@@ -90,7 +99,7 @@ def try_creating_docfetching_task(
                search_settings_id=search_settings.id,
                tenant_id=tenant_id,
            ),
-            queue=OnyxCeleryQueues.CONNECTOR_DOC_FETCHING,
+            queue=queue,
            task_id=custom_task_id,
            priority=priority,
        )
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -12,7 +12,6 @@ from celery import Celery
 from celery import shared_task
 from celery import Task
 from celery.exceptions import SoftTimeLimitExceeded
-from fastapi import HTTPException
 from pydantic import BaseModel
 from redis import Redis
 from redis.lock import Lock as RedisLock
@@ -41,11 +40,9 @@ from onyx.background.indexing.checkpointing_utils import (
 )
 from onyx.background.indexing.index_attempt_utils import cleanup_index_attempts
 from onyx.background.indexing.index_attempt_utils import get_old_index_attempts
-from onyx.configs.app_configs import AUTH_TYPE
 from onyx.configs.app_configs import MANAGED_VESPA
 from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
 from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
-from onyx.configs.constants import AuthType
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_INDEXING_LOCK_TIMEOUT
 from onyx.configs.constants import MilestoneRecordType
@@ -62,9 +59,11 @@ from onyx.db.connector import mark_ccpair_with_indexing_trigger
 from onyx.db.connector_credential_pair import (
    fetch_indexable_standard_connector_credential_pair_ids,
 )
+from onyx.db.connector_credential_pair import (
+    fetch_indexable_user_file_connector_credential_pair_ids,
+)
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import set_cc_pair_repeated_error_state
-from onyx.db.connector_credential_pair import update_connector_credential_pair_from_id
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
 from onyx.db.engine.time_utils import get_db_current_time
 from onyx.db.enums import ConnectorCredentialPairStatus
@@ -87,7 +86,7 @@ from onyx.db.models import SearchSettings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.db.swap_index import check_and_perform_index_swap
-from onyx.document_index.factory import get_all_document_indices
+from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.document_batch_storage import DocumentBatchStorage
 from onyx.file_store.document_batch_storage import get_document_batch_storage
 from onyx.httpx.httpx_pool import HttpxPool
@@ -113,7 +112,6 @@ from onyx.utils.telemetry import RecordType
 from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
 from shared_configs.configs import INDEXING_MODEL_SERVER_PORT
 from shared_configs.configs import MULTI_TENANT
-from shared_configs.configs import USAGE_LIMITS_ENABLED
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
 from shared_configs.contextvars import INDEX_ATTEMPT_INFO_CONTEXTVAR

@@ -540,7 +538,12 @@ def check_indexing_completion(
            ]:
                # User file connectors must be paused on success
                # NOTE: _run_indexing doesn't update connectors if the index attempt is the future embedding model
-                cc_pair.status = ConnectorCredentialPairStatus.ACTIVE
+                # TODO: figure out why this doesn't pause connectors during swap
+                cc_pair.status = (
+                    ConnectorCredentialPairStatus.PAUSED
+                    if cc_pair.is_user_file
+                    else ConnectorCredentialPairStatus.ACTIVE
+                )
                db_session.commit()

            mt_cloud_telemetry(
@@ -806,8 +809,13 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                    db_session, active_cc_pairs_only=True
                )
            )
+            user_file_cc_pair_ids = (
+                fetch_indexable_user_file_connector_credential_pair_ids(
+                    db_session, search_settings_id=current_search_settings.id
+                )
+            )

-            primary_cc_pair_ids = standard_cc_pair_ids
+            primary_cc_pair_ids = standard_cc_pair_ids + user_file_cc_pair_ids

            # Get CC pairs for secondary search settings
            secondary_cc_pair_ids: list[int] = []
@@ -823,47 +831,30 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None:
                        db_session, active_cc_pairs_only=not include_paused
                    )
                )
+                user_file_cc_pair_ids = (
+                    fetch_indexable_user_file_connector_credential_pair_ids(
+                        db_session, search_settings_id=secondary_search_settings.id
+                    )
+                    or []
+                )

-                secondary_cc_pair_ids = standard_cc_pair_ids
+                secondary_cc_pair_ids = standard_cc_pair_ids + user_file_cc_pair_ids

        # Flag CC pairs in repeated error state for primary/current search settings
        with get_session_with_current_tenant() as db_session:
            for cc_pair_id in primary_cc_pair_ids:
                lock_beat.reacquire()

-                cc_pair = get_connector_credential_pair_from_id(
-                    db_session=db_session,
+                if is_in_repeated_error_state(
                    cc_pair_id=cc_pair_id,
-                )
-
-                # if already in repeated error state, don't do anything
-                # this is important so that we don't keep pausing the connector
-                # immediately upon a user un-pausing it to manually re-trigger and
-                # recover.
-                if (
-                    cc_pair
-                    and not cc_pair.in_repeated_error_state
-                    and is_in_repeated_error_state(
-                        cc_pair=cc_pair,
-                        search_settings_id=current_search_settings.id,
-                        db_session=db_session,
-                    )
+                    search_settings_id=current_search_settings.id,
+                    db_session=db_session,
                ):
                    set_cc_pair_repeated_error_state(
                        db_session=db_session,
                        cc_pair_id=cc_pair_id,
                        in_repeated_error_state=True,
                    )
-                    # When entering repeated error state, also pause the connector
-                    # to prevent continued indexing retry attempts burning through embedding credits.
-                    # NOTE: only for Cloud, since most self-hosted users use self-hosted embedding
-                    # models. Also, they are more prone to repeated failures -> eventual success.
-                    if AUTH_TYPE == AuthType.CLOUD:
-                        update_connector_credential_pair_from_id(
-                            db_session=db_session,
-                            cc_pair_id=cc_pair.id,
-                            status=ConnectorCredentialPairStatus.PAUSED,
-                        )

        # NOTE: At this point, we haven't done heavy checks on whether or not the CC pairs should actually be indexed
        # Heavy check, should_index(), is called in _kickoff_indexing_tasks
@@ -1288,26 +1279,6 @@ def docprocessing_task(
        INDEX_ATTEMPT_INFO_CONTEXTVAR.reset(token)


-def _check_chunk_usage_limit(tenant_id: str) -> None:
-    """Check if chunk indexing usage limit has been exceeded.
-
-    Raises UsageLimitExceededError if the limit is exceeded.
-    """
-    if not USAGE_LIMITS_ENABLED:
-        return
-
-    from onyx.db.usage import UsageType
-    from onyx.server.usage_limits import check_usage_and_raise
-
-    with get_session_with_current_tenant() as db_session:
-        check_usage_and_raise(
-            db_session=db_session,
-            usage_type=UsageType.CHUNKS_INDEXED,
-            tenant_id=tenant_id,
-            pending_amount=0,  # Just check current usage
-        )
-
-
 def _docprocessing_task(
    index_attempt_id: int,
    cc_pair_id: int,
@@ -1319,25 +1290,6 @@ def _docprocessing_task(
    if tenant_id:
        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

-    # Check if chunk indexing usage limit has been exceeded before processing
-    if USAGE_LIMITS_ENABLED:
-        try:
-            _check_chunk_usage_limit(tenant_id)
-        except HTTPException as e:
-            # Log the error and fail the indexing attempt
-            task_logger.error(
-                f"Chunk indexing usage limit exceeded for tenant {tenant_id}: {e}"
-            )
-            with get_session_with_current_tenant() as db_session:
-                from onyx.db.index_attempt import mark_attempt_failed
-
-                mark_attempt_failed(
-                    index_attempt_id=index_attempt_id,
-                    db_session=db_session,
-                    failure_reason=str(e),
-                )
-            raise
-
    task_logger.info(
        f"Processing document batch: "
        f"attempt={index_attempt_id} "
@@ -1436,7 +1388,7 @@ def _docprocessing_task(
                callback=callback,
            )

-            document_indices = get_all_document_indices(
+            document_index = get_default_document_index(
                index_attempt.search_settings,
                None,
                httpx_client=HttpxPool.get("vespa"),
@@ -1473,7 +1425,7 @@ def _docprocessing_task(
            # real work happens here!
            index_pipeline_result = run_indexing_pipeline(
                embedder=embedding_model,
-                document_indices=document_indices,
+                document_index=document_index,
                ignore_time_skip=True,  # Documents are already filtered during extraction
                db_session=db_session,
                tenant_id=tenant_id,
@@ -1482,23 +1434,6 @@ def _docprocessing_task(
                adapter=adapter,
            )

-        # Track chunk indexing usage for cloud usage limits
-        if USAGE_LIMITS_ENABLED and index_pipeline_result.total_chunks > 0:
-            try:
-                from onyx.db.usage import increment_usage
-                from onyx.db.usage import UsageType
-
-                with get_session_with_current_tenant() as usage_db_session:
-                    increment_usage(
-                        db_session=usage_db_session,
-                        usage_type=UsageType.CHUNKS_INDEXED,
-                        amount=index_pipeline_result.total_chunks,
-                    )
-                    usage_db_session.commit()
-            except Exception as e:
-                # Log but don't fail indexing if usage tracking fails
-                task_logger.warning(f"Failed to track chunk indexing usage: {e}")
-
        # Update batch completion and document counts atomically using database coordination

        with get_session_with_current_tenant() as db_session, cross_batch_db_lock:
--- a/backend/onyx/background/celery/tasks/docprocessing/utils.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/utils.py
@@ -10,6 +10,7 @@ from sqlalchemy.orm import Session
 from onyx.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import DocumentSource
+from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.engine.time_utils import get_db_current_time
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import IndexingStatus
@@ -125,9 +126,18 @@ class IndexingCallback(IndexingHeartbeatInterface):


 def is_in_repeated_error_state(
-    cc_pair: ConnectorCredentialPair, search_settings_id: int, db_session: Session
+    cc_pair_id: int, search_settings_id: int, db_session: Session
 ) -> bool:
    """Checks if the cc pair / search setting combination is in a repeated error state."""
+    cc_pair = get_connector_credential_pair_from_id(
+        db_session=db_session,
+        cc_pair_id=cc_pair_id,
+    )
+    if not cc_pair:
+        raise RuntimeError(
+            f"is_in_repeated_error_state - could not find cc_pair with id={cc_pair_id}"
+        )
+
    # if the connector doesn't have a refresh_freq, a single failed attempt is enough
    number_of_failed_attempts_in_a_row_needed = (
        NUM_REPEAT_ERRORS_BEFORE_REPEATED_ERROR_STATE
@@ -136,7 +146,7 @@ def is_in_repeated_error_state(
    )

    most_recent_index_attempts = get_recent_attempts_for_cc_pair(
-        cc_pair_id=cc_pair.id,
+        cc_pair_id=cc_pair_id,
        search_settings_id=search_settings_id,
        limit=number_of_failed_attempts_in_a_row_needed,
        db_session=db_session,
@@ -170,7 +180,7 @@ def should_index(
        db_session=db_session,
    )
    all_recent_errored = is_in_repeated_error_state(
-        cc_pair=cc_pair,
+        cc_pair_id=cc_pair.id,
        search_settings_id=search_settings_instance.id,
        db_session=db_session,
    )
--- a/backend/onyx/background/celery/tasks/evals/tasks.py
+++ b/backend/onyx/background/celery/tasks/evals/tasks.py
@@ -1,15 +1,9 @@
-from datetime import datetime
-from datetime import timezone
 from typing import Any

 from celery import shared_task
 from celery import Task

-from onyx.configs.app_configs import BRAINTRUST_API_KEY
 from onyx.configs.app_configs import JOB_TIMEOUT
-from onyx.configs.app_configs import SCHEDULED_EVAL_DATASET_NAMES
-from onyx.configs.app_configs import SCHEDULED_EVAL_PERMISSIONS_EMAIL
-from onyx.configs.app_configs import SCHEDULED_EVAL_PROJECT
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.evals.eval import run_eval
 from onyx.evals.models import EvalConfigurationOptions
@@ -39,109 +33,3 @@ def eval_run_task(
    except Exception:
        logger.error("Failed to run eval task")
        raise
-
-
-@shared_task(
-    name=OnyxCeleryTask.SCHEDULED_EVAL_TASK,
-    ignore_result=True,
-    soft_time_limit=JOB_TIMEOUT * 5,  # Allow more time for multiple datasets
-    bind=True,
-    trail=False,
-)
-def scheduled_eval_task(self: Task, **kwargs: Any) -> None:
-    """
-    Scheduled task to run evaluations on configured datasets.
-    Runs weekly on Sunday at midnight UTC.
-
-    Configure via environment variables (with defaults):
-    - SCHEDULED_EVAL_DATASET_NAMES: Comma-separated list of Braintrust dataset names
-    - SCHEDULED_EVAL_PERMISSIONS_EMAIL: Email for search permissions (default: roshan@onyx.app)
-    - SCHEDULED_EVAL_PROJECT: Braintrust project name
-    """
-    if not BRAINTRUST_API_KEY:
-        logger.error("BRAINTRUST_API_KEY is not configured, cannot run scheduled evals")
-        return
-
-    if not SCHEDULED_EVAL_PROJECT:
-        logger.error(
-            "SCHEDULED_EVAL_PROJECT is not configured, cannot run scheduled evals"
-        )
-        return
-
-    if not SCHEDULED_EVAL_DATASET_NAMES:
-        logger.info("No scheduled eval datasets configured, skipping")
-        return
-
-    if not SCHEDULED_EVAL_PERMISSIONS_EMAIL:
-        logger.error("SCHEDULED_EVAL_PERMISSIONS_EMAIL not configured")
-        return
-
-    project_name = SCHEDULED_EVAL_PROJECT
-    dataset_names = SCHEDULED_EVAL_DATASET_NAMES
-    permissions_email = SCHEDULED_EVAL_PERMISSIONS_EMAIL
-
-    # Create a timestamp for the scheduled run
-    run_timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d")
-
-    logger.info(
-        f"Starting scheduled eval pipeline for project '{project_name}' "
-        f"with {len(dataset_names)} dataset(s): {dataset_names}"
-    )
-
-    pipeline_start = datetime.now(timezone.utc)
-    results: list[dict[str, Any]] = []
-
-    for dataset_name in dataset_names:
-        start_time = datetime.now(timezone.utc)
-        error_message: str | None = None
-        success = False
-
-        # Create informative experiment name for scheduled runs
-        experiment_name = f"{dataset_name} - {run_timestamp}"
-
-        try:
-            logger.info(
-                f"Running scheduled eval for dataset: {dataset_name} "
-                f"(project: {project_name})"
-            )
-
-            configuration = EvalConfigurationOptions(
-                search_permissions_email=permissions_email,
-                dataset_name=dataset_name,
-                no_send_logs=False,
-                braintrust_project=project_name,
-                experiment_name=experiment_name,
-            )
-
-            result = run_eval(
-                configuration=configuration,
-                remote_dataset_name=dataset_name,
-            )
-            success = result.success
-            logger.info(f"Completed eval for {dataset_name}: success={success}")
-
-        except Exception as e:
-            logger.exception(f"Failed to run scheduled eval for {dataset_name}")
-            error_message = str(e)
-            success = False
-
-        end_time = datetime.now(timezone.utc)
-
-        results.append(
-            {
-                "dataset_name": dataset_name,
-                "success": success,
-                "start_time": start_time,
-                "end_time": end_time,
-                "error_message": error_message,
-            }
-        )
-
-    pipeline_end = datetime.now(timezone.utc)
-    total_duration = (pipeline_end - pipeline_start).total_seconds()
-
-    passed_count = sum(1 for r in results if r["success"])
-    logger.info(
-        f"Scheduled eval pipeline completed: {passed_count}/{len(results)} passed "
-        f"in {total_duration:.1f}s"
-    )
--- a/backend/onyx/background/celery/tasks/llm_model_update/tasks.py
+++ b/backend/onyx/background/celery/tasks/llm_model_update/tasks.py
@@ -1,45 +1,135 @@
+from typing import Any
+
+import requests
 from celery import shared_task
 from celery import Task

 from onyx.background.celery.apps.app_base import task_logger
-from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
+from onyx.configs.app_configs import JOB_TIMEOUT
+from onyx.configs.app_configs import LLM_MODEL_UPDATE_API_URL
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
-from onyx.llm.well_known_providers.auto_update_service import (
-    sync_llm_models_from_github,
-)
+from onyx.db.models import LLMProvider
+from onyx.db.models import ModelConfiguration
+
+
+def _process_model_list_response(model_list_json: Any) -> list[str]:
+    # Handle case where response is wrapped in a "data" field
+    if isinstance(model_list_json, dict):
+        if "data" in model_list_json:
+            model_list_json = model_list_json["data"]
+        elif "models" in model_list_json:
+            model_list_json = model_list_json["models"]
+        else:
+            raise ValueError(
+                "Invalid response from API - expected dict with 'data' or "
+                f"'models' field, got {type(model_list_json)}"
+            )
+
+    if not isinstance(model_list_json, list):
+        raise ValueError(
+            f"Invalid response from API - expected list, got {type(model_list_json)}"
+        )
+
+    # Handle both string list and object list cases
+    model_names: list[str] = []
+    for item in model_list_json:
+        if isinstance(item, str):
+            model_names.append(item)
+        elif isinstance(item, dict):
+            if "model_name" in item:
+                model_names.append(item["model_name"])
+            elif "id" in item:
+                model_names.append(item["id"])
+            else:
+                raise ValueError(
+                    f"Invalid item in model list - expected dict with model_name or id, got {type(item)}"
+                )
+        else:
+            raise ValueError(
+                f"Invalid item in model list - expected string or dict, got {type(item)}"
+            )
+
+    return model_names


@shared_task(
-    name=OnyxCeleryTask.CHECK_FOR_AUTO_LLM_UPDATE,
+    name=OnyxCeleryTask.CHECK_FOR_LLM_MODEL_UPDATE,
    ignore_result=True,
-    soft_time_limit=300,  # 5 minute timeout
+    soft_time_limit=JOB_TIMEOUT,
    trail=False,
    bind=True,
 )
-def check_for_auto_llm_updates(self: Task, *, tenant_id: str) -> bool | None:
-    """Periodic task to fetch LLM model updates from GitHub
-    and sync them to providers in Auto mode.
-
-    This task checks the GitHub-hosted config file and updates all
-    providers that have is_auto_mode=True.
-    """
-    if not AUTO_LLM_CONFIG_URL:
-        task_logger.debug("AUTO_LLM_CONFIG_URL not configured, skipping")
-        return None
+def check_for_llm_model_update(self: Task, *, tenant_id: str) -> bool | None:
+    if not LLM_MODEL_UPDATE_API_URL:
+        raise ValueError("LLM model update API URL not configured")

+    # First fetch the models from the API
    try:
-        # Sync to database
-        with get_session_with_current_tenant() as db_session:
-            results = sync_llm_models_from_github(db_session)
-
-            if results:
-                task_logger.info(f"Auto mode sync results: {results}")
-            else:
-                task_logger.debug("No model updates applied")
+        response = requests.get(LLM_MODEL_UPDATE_API_URL)
+        response.raise_for_status()
+        available_models = _process_model_list_response(response.json())
+        task_logger.info(f"Found available models: {available_models}")

    except Exception:
-        task_logger.exception("Error in auto LLM update task")
-        raise
+        task_logger.exception("Failed to fetch models from API.")
+        return None
+
+    # Then update the database with the fetched models
+    with get_session_with_current_tenant() as db_session:
+        # Get the default LLM provider
+        default_provider = (
+            db_session.query(LLMProvider)
+            .filter(LLMProvider.is_default_provider.is_(True))
+            .first()
+        )
+
+        if not default_provider:
+            task_logger.warning("No default LLM provider found")
+            return None
+
+        # log change if any
+        old_models = set(
+            model_configuration.name
+            for model_configuration in default_provider.model_configurations
+        )
+        new_models = set(available_models)
+        added_models = new_models - old_models
+        removed_models = old_models - new_models
+
+        if added_models:
+            task_logger.info(f"Adding models: {sorted(added_models)}")
+        if removed_models:
+            task_logger.info(f"Removing models: {sorted(removed_models)}")
+
+        # Update the provider's model list
+        # Remove models that are no longer available
+        db_session.query(ModelConfiguration).filter(
+            ModelConfiguration.llm_provider_id == default_provider.id,
+            ModelConfiguration.name.notin_(available_models),
+        ).delete(synchronize_session=False)
+
+        # Add new models
+        for available_model_name in available_models:
+            db_session.merge(
+                ModelConfiguration(
+                    llm_provider_id=default_provider.id,
+                    name=available_model_name,
+                    is_visible=False,
+                    max_input_tokens=None,
+                )
+            )
+
+        # if the default model is no longer available, set it to the first model in the list
+        if default_provider.default_model_name not in available_models:
+            task_logger.info(
+                f"Default model {default_provider.default_model_name} not "
+                f"available, setting to first model in list: {available_models[0]}"
+            )
+            default_provider.default_model_name = available_models[0]
+        db_session.commit()
+
+        if added_models or removed_models:
+            task_logger.info("Updated model list for default provider.")

    return True
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Evan Lohn	d81337e345	mypy	2025-12-29 16:23:03 -08:00
Evan Lohn	df3c8982a1	mypy	2025-12-29 16:22:11 -08:00
Evan Lohn	dbb720e7f9	WIP	2025-12-29 16:20:45 -08:00
Evan Lohn	f68b9526fb	remove future stuff	2025-12-29 16:20:45 -08:00
Evan Lohn	6460b5df4b	refactor: drive connector	2025-12-29 16:20:45 -08:00