fix(citations): enable citation sidebar w/ web_search-only assistants (#7888 )

fix(llm): Hide private models from Agent Creation (#7873 )
fix(layout): adjust footer margin and prevent page refresh on chatsession drop (#7759 )
2026-02-17 07:45:47 +00:00 · 2026-01-27 13:26:29 -08:00 · 2026-01-27 12:21:06 -08:00 · 2026-01-27 11:57:18 -08:00 · 2026-01-27 11:33:03 -08:00 · 2026-01-27 11:26:52 -08:00
1342 changed files with 116175 additions and 46147 deletions
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,8 @@
+# Exclude these commits from git blame (e.g. mass reformatting).
+# These are ignored by GitHub automatically.
+# To enable this locally, run:
+#
+#    git config blame.ignoreRevsFile .git-blame-ignore-revs
+
+3134e5f840c12c8f32613ce520101a047c89dcc2  # refactor(whitespace): rm temporary react fragments (#7161)
+ed3f72bc75f3e3a9ae9e4d8cd38278f9c97e78b4  # refactor(whitespace): rm react fragment #7190
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,3 +1,10 @@
 * @onyx-dot-app/onyx-core-team
 # Helm charts Owners
 /helm/ @justin-tahara
+
+# Web standards updates
+/web/STANDARDS.md @raunakab @Weves
+
+# Agent context files
+/CLAUDE.md.template @Weves
+/AGENTS.md.template @Weves
--- a/.github/actions/setup-python-and-install-dependencies/action.yml
+++ b/.github/actions/setup-python-and-install-dependencies/action.yml
@@ -7,12 +7,6 @@ inputs:
 runs:
  using: "composite"
  steps:
-    - name: Setup uv
-      uses: astral-sh/setup-uv@caf0cab7a618c569241d31dcd442f54681755d39 # ratchet:astral-sh/setup-uv@v3
-      # TODO: Enable caching once there is a uv.lock file checked in.
-      # with:
-      #   enable-cache: true
-
    - name: Compute requirements hash
      id: req-hash
      shell: bash
@@ -28,6 +22,8 @@ runs:
        done <<< "$REQUIREMENTS"
        echo "hash=$(echo "$hash" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT"

+    # NOTE: This comes before Setup uv since clean-ups run in reverse chronological order
+    # such that Setup uv's prune-cache is able to prune the cache before we upload.
    - name: Cache uv cache directory
      uses: runs-on/cache@50350ad4242587b6c8c2baa2e740b1bc11285ff4 # ratchet:runs-on/cache@v4
      with:
@@ -36,6 +32,14 @@ runs:
        restore-keys: |
          ${{ runner.os }}-uv-

+    - name: Setup uv
+      uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
+      with:
+        version: "0.9.9"
+      # TODO: Enable caching once there is a uv.lock file checked in.
+      # with:
+      #   enable-cache: true
+
    - name: Setup Python
      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # ratchet:actions/setup-python@v5
      with:
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,10 +1,10 @@
 ## Description

-[Provide a brief description of the changes in this PR]
+<!--- Provide a brief description of the changes in this PR --->

 ## How Has This Been Tested?

-[Describe the tests you ran to verify your changes]
+<!--- Describe the tests you ran to verify your changes --->

 ## Additional Options

--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
--- a/.github/workflows/merge-group.yml
+++ b/.github/workflows/merge-group.yml
@@ -0,0 +1,31 @@
+name: Merge Group-Specific
+
+on:
+  merge_group:
+
+permissions:
+  contents: read
+
+jobs:
+  # This job immediately succeeds to satisfy branch protection rules on merge_group events.
+  # There is a similarly named "required" job in pr-integration-tests.yml which runs the actual
+  # integration tests. That job runs on both pull_request and merge_group events, and this job
+  # exists solely to provide a fast-passing check with the same name for branch protection.
+  # The actual tests remain enforced on presubmit (pull_request events).
+  required:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    steps:
+      - name: Success
+        run: echo "Success"
+  # This job immediately succeeds to satisfy branch protection rules on merge_group events.
+  # There is a similarly named "playwright-required" job in pr-playwright-tests.yml which runs
+  # the actual playwright tests. That job runs on both pull_request and merge_group events, and
+  # this job exists solely to provide a fast-passing check with the same name for branch protection.
+  # The actual tests remain enforced on presubmit (pull_request events).
+  playwright-required:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    steps:
+      - name: Success
+        run: echo "Success"
--- a/.github/workflows/nightly-close-stale-issues.yml
+++ b/.github/workflows/nightly-close-stale-issues.yml
@@ -13,7 +13,7 @@ jobs:
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
-      - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # ratchet:actions/stale@v10
+      - uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # ratchet:actions/stale@v10
        with:
          stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
          stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
--- a/.github/workflows/pr-database-tests.yml
+++ b/.github/workflows/pr-database-tests.yml
@@ -0,0 +1,62 @@
+name: Database Tests
+concurrency:
+  group: Database-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  cancel-in-progress: true
+
+on:
+  merge_group:
+  pull_request:
+    branches:
+      - main
+      - "release/**"
+  push:
+    tags:
+      - "v*.*.*"
+
+permissions:
+  contents: read
+
+jobs:
+  database-tests:
+    runs-on:
+      - runs-on
+      - runner=2cpu-linux-arm64
+      - "run-id=${{ github.run_id }}-database-tests"
+    timeout-minutes: 45
+    steps:
+      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
+
+      - name: Checkout code
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Setup Python and Install Dependencies
+        uses: ./.github/actions/setup-python-and-install-dependencies
+        with:
+          requirements: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+
+      - name: Generate OpenAPI schema and Python client
+        shell: bash
+        run: |
+          ods openapi all
+
+      # needed for pulling external images otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Start Docker containers
+        working-directory: ./deployment/docker_compose
+        run: |
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d \
+            relational_db
+
+      - name: Run Database Tests
+        working-directory: ./backend
+        run: pytest -m alembic tests/integration/tests/migrations/
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -38,6 +38,8 @@ env:
  # LLMs
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+  VERTEX_CREDENTIALS: ${{ secrets.VERTEX_CREDENTIALS }}
+  VERTEX_LOCATION: ${{ vars.VERTEX_LOCATION }}

  # Code Interpreter
  # TODO: debug why this is failing and enable
@@ -170,7 +172,7 @@ jobs:

      - name: Upload Docker logs
        if: failure()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v5
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: docker-logs-${{ matrix.test-dir }}
          path: docker-logs/
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -6,11 +6,11 @@ concurrency:
 on:
  merge_group:
  pull_request:
-    branches: [ main ]
+    branches: [main]
  push:
    tags:
      - "v*.*.*"
-  workflow_dispatch:  # Allows manual triggering
+  workflow_dispatch: # Allows manual triggering

 permissions:
  contents: read
@@ -18,225 +18,233 @@ permissions:
 jobs:
  helm-chart-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,hdd=256,"run-id=${{ github.run_id }}-helm-chart-check"]
+    runs-on:
+      [
+        runs-on,
+        runner=8cpu-linux-x64,
+        hdd=256,
+        "run-id=${{ github.run_id }}-helm-chart-check",
+      ]
    timeout-minutes: 45

    # fetch-depth 0 is required for helm/chart-testing-action
    steps:
-    - name: Checkout code
-      uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
-      with:
-        fetch-depth: 0
-        persist-credentials: false
+      - name: Checkout code
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
+        with:
+          fetch-depth: 0
+          persist-credentials: false

-    - name: Set up Helm
-      uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
-      with:
-        version: v3.19.0
+      - name: Set up Helm
+        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # ratchet:azure/setup-helm@v4.3.1
+        with:
+          version: v3.19.0

-    - name: Set up chart-testing
-      uses: helm/chart-testing-action@6ec842c01de15ebb84c8627d2744a0c2f2755c9f # ratchet:helm/chart-testing-action@v2.8.0
+      - name: Set up chart-testing
+        # NOTE: This is Jamison's patch from https://github.com/helm/chart-testing-action/pull/194
+        uses: helm/chart-testing-action@8958a6ac472cbd8ee9a8fbb6f1acbc1b0e966e44 # zizmor: ignore[impostor-commit]
+        with:
+          uv_version: "0.9.9"

-    # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
-    - name: Run chart-testing (list-changed)
-      id: list-changed
-      env:
-        DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
-      run: |
-        echo "default_branch: ${DEFAULT_BRANCH}"
-        changed=$(ct list-changed --remote origin --target-branch ${DEFAULT_BRANCH} --chart-dirs deployment/helm/charts)
-        echo "list-changed output: $changed"
-        if [[ -n "$changed" ]]; then
-          echo "changed=true" >> "$GITHUB_OUTPUT"
-        fi
-
-    # uncomment to force run chart-testing
-#     - name: Force run chart-testing (list-changed)
-#       id: list-changed
-#       run: echo "changed=true" >> $GITHUB_OUTPUT
-
-    # lint all charts if any changes were detected
-    - name: Run chart-testing (lint)
-      if: steps.list-changed.outputs.changed == 'true'
-      run: ct lint --config ct.yaml --all
-      # the following would lint only changed charts, but linting isn't expensive
-      # run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}
-
-    - name: Create kind cluster
-      if: steps.list-changed.outputs.changed == 'true'
-      uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0
-
-    - name: Pre-install cluster status check
-      if: steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Pre-install Cluster Status ==="
-        kubectl get nodes -o wide
-        kubectl get pods --all-namespaces
-        kubectl get storageclass
-
-    - name: Add Helm repositories and update
-      if: steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Adding Helm repositories ==="
-        helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
-        helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
-        helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
-        helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
-        helm repo add minio https://charts.min.io/
-        helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
-        helm repo update
-
-    - name: Install Redis operator
-      if: steps.list-changed.outputs.changed == 'true'
-      shell: bash
-      run: |
-        echo "=== Installing redis-operator CRDs ==="
-        helm upgrade --install redis-operator ot-container-kit/redis-operator \
-          --namespace redis-operator --create-namespace --wait --timeout 300s
-
-    - name: Pre-pull required images
-      if: steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Pre-pulling required images to avoid timeout ==="
-        KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
-        echo "Kind cluster: $KIND_CLUSTER"
-
-        IMAGES=(
-          "ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
-          "quay.io/opstree/redis:v7.0.15"
-          "docker.io/onyxdotapp/onyx-web-server:latest"
-        )
-
-        for image in "${IMAGES[@]}"; do
-          echo "Pre-pulling $image"
-          if docker pull "$image"; then
-            kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
-          else
-            echo "Failed to pull $image"
+      # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
+      - name: Run chart-testing (list-changed)
+        id: list-changed
+        env:
+          DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
+        run: |
+          echo "default_branch: ${DEFAULT_BRANCH}"
+          changed=$(ct list-changed --remote origin --target-branch ${DEFAULT_BRANCH} --chart-dirs deployment/helm/charts)
+          echo "list-changed output: $changed"
+          if [[ -n "$changed" ]]; then
+            echo "changed=true" >> "$GITHUB_OUTPUT"
          fi
-        done

-        echo "=== Images loaded into Kind cluster ==="
-        docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."
+      # uncomment to force run chart-testing
+      #     - name: Force run chart-testing (list-changed)
+      #       id: list-changed
+      #       run: echo "changed=true" >> $GITHUB_OUTPUT
+      # lint all charts if any changes were detected
+      - name: Run chart-testing (lint)
+        if: steps.list-changed.outputs.changed == 'true'
+        run: ct lint --config ct.yaml --all
+        # the following would lint only changed charts, but linting isn't expensive
+        # run: ct lint --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}

-    - name: Validate chart dependencies
-      if: steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Validating chart dependencies ==="
-        cd deployment/helm/charts/onyx
-        helm dependency update
-        helm lint .
+      - name: Create kind cluster
+        if: steps.list-changed.outputs.changed == 'true'
+        uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # ratchet:helm/kind-action@v1.13.0

-    - name: Run chart-testing (install) with enhanced monitoring
-      timeout-minutes: 25
-      if: steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Starting chart installation with monitoring ==="
+      - name: Pre-install cluster status check
+        if: steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Pre-install Cluster Status ==="
+          kubectl get nodes -o wide
+          kubectl get pods --all-namespaces
+          kubectl get storageclass

-        # Function to monitor cluster state
-        monitor_cluster() {
-          while true; do
-            echo "=== Cluster Status Check at $(date) ==="
-            # Only show non-running pods to reduce noise
-            NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)
-            if [ "$NON_RUNNING_PODS" -gt 0 ]; then
-              echo "Non-running pods:"
-              kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded
+      - name: Add Helm repositories and update
+        if: steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Adding Helm repositories ==="
+          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
+          helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
+          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
+          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
+          helm repo add minio https://charts.min.io/
+          helm repo add code-interpreter https://onyx-dot-app.github.io/code-interpreter/
+          helm repo update
+
+      - name: Install Redis operator
+        if: steps.list-changed.outputs.changed == 'true'
+        shell: bash
+        run: |
+          echo "=== Installing redis-operator CRDs ==="
+          helm upgrade --install redis-operator ot-container-kit/redis-operator \
+            --namespace redis-operator --create-namespace --wait --timeout 300s
+
+      - name: Pre-pull required images
+        if: steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Pre-pulling required images to avoid timeout ==="
+          KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
+          echo "Kind cluster: $KIND_CLUSTER"
+
+          IMAGES=(
+            "ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
+            "quay.io/opstree/redis:v7.0.15"
+            "docker.io/onyxdotapp/onyx-web-server:latest"
+          )
+
+          for image in "${IMAGES[@]}"; do
+            echo "Pre-pulling $image"
+            if docker pull "$image"; then
+              kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
            else
-              echo "All pods running successfully"
+              echo "Failed to pull $image"
            fi
-            # Only show recent events if there are issues
-            RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)
-            if [ -n "$RECENT_EVENTS" ]; then
-              echo "Recent warnings/errors:"
-              echo "$RECENT_EVENTS"
-            fi
-            sleep 60
          done
-        }

-        # Start monitoring in background
-        monitor_cluster &
-        MONITOR_PID=$!
+          echo "=== Images loaded into Kind cluster ==="
+          docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."

-        # Set up cleanup
-        cleanup() {
-          echo "=== Cleaning up monitoring process ==="
-          kill $MONITOR_PID 2>/dev/null || true
+      - name: Validate chart dependencies
+        if: steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Validating chart dependencies ==="
+          cd deployment/helm/charts/onyx
+          helm dependency update
+          helm lint .
+
+      - name: Run chart-testing (install) with enhanced monitoring
+        timeout-minutes: 25
+        if: steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Starting chart installation with monitoring ==="
+
+          # Function to monitor cluster state
+          monitor_cluster() {
+            while true; do
+              echo "=== Cluster Status Check at $(date) ==="
+              # Only show non-running pods to reduce noise
+              NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)
+              if [ "$NON_RUNNING_PODS" -gt 0 ]; then
+                echo "Non-running pods:"
+                kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded
+              else
+                echo "All pods running successfully"
+              fi
+              # Only show recent events if there are issues
+              RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)
+              if [ -n "$RECENT_EVENTS" ]; then
+                echo "Recent warnings/errors:"
+                echo "$RECENT_EVENTS"
+              fi
+              sleep 60
+            done
+          }
+
+          # Start monitoring in background
+          monitor_cluster &
+          MONITOR_PID=$!
+
+          # Set up cleanup
+          cleanup() {
+            echo "=== Cleaning up monitoring process ==="
+            kill $MONITOR_PID 2>/dev/null || true
+            echo "=== Final cluster state ==="
+            kubectl get pods --all-namespaces
+            kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
+          }
+
+          # Trap cleanup on exit
+          trap cleanup EXIT
+
+          # Run the actual installation with detailed logging
+          echo "=== Starting ct install ==="
+          set +e
+          ct install --all \
+            --helm-extra-set-args="\
+              --set=nginx.enabled=false \
+              --set=minio.enabled=false \
+              --set=vespa.enabled=false \
+              --set=slackbot.enabled=false \
+              --set=postgresql.enabled=true \
+              --set=postgresql.nameOverride=cloudnative-pg \
+              --set=postgresql.cluster.storage.storageClass=standard \
+              --set=redis.enabled=true \
+              --set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
+              --set=webserver.replicaCount=1 \
+              --set=api.replicaCount=0 \
+              --set=inferenceCapability.replicaCount=0 \
+              --set=indexCapability.replicaCount=0 \
+              --set=celery_beat.replicaCount=0 \
+              --set=celery_worker_heavy.replicaCount=0 \
+              --set=celery_worker_docfetching.replicaCount=0 \
+              --set=celery_worker_docprocessing.replicaCount=0 \
+              --set=celery_worker_light.replicaCount=0 \
+              --set=celery_worker_monitoring.replicaCount=0 \
+              --set=celery_worker_primary.replicaCount=0 \
+              --set=celery_worker_user_file_processing.replicaCount=0 \
+              --set=celery_worker_user_files_indexing.replicaCount=0" \
+            --helm-extra-args="--timeout 900s --debug" \
+            --debug --config ct.yaml
+          CT_EXIT=$?
+          set -e
+
+          if [[ $CT_EXIT -ne 0 ]]; then
+            echo "ct install failed with exit code $CT_EXIT"
+            exit $CT_EXIT
+          else
+            echo "=== Installation completed successfully ==="
+          fi
+
+          kubectl get pods --all-namespaces
+
+      - name: Post-install verification
+        if: steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Post-install verification ==="
+          kubectl get pods --all-namespaces
+          kubectl get services --all-namespaces
+          # Only show issues if they exist
+          kubectl describe pods --all-namespaces | grep -A 5 -B 2 "Failed\|Error\|Warning" || echo "No pod issues found"
+
+      - name: Cleanup on failure
+        if: failure() && steps.list-changed.outputs.changed == 'true'
+        run: |
+          echo "=== Cleanup on failure ==="
          echo "=== Final cluster state ==="
          kubectl get pods --all-namespaces
-          kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
-        }
+          kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10

-        # Trap cleanup on exit
-        trap cleanup EXIT
+          echo "=== Pod descriptions for debugging ==="
+          kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"

-        # Run the actual installation with detailed logging
-        echo "=== Starting ct install ==="
-        set +e
-        ct install --all \
-          --helm-extra-set-args="\
-            --set=nginx.enabled=false \
-            --set=minio.enabled=false \
-            --set=vespa.enabled=false \
-            --set=slackbot.enabled=false \
-            --set=postgresql.enabled=true \
-            --set=postgresql.nameOverride=cloudnative-pg \
-            --set=postgresql.cluster.storage.storageClass=standard \
-            --set=redis.enabled=true \
-            --set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
-            --set=webserver.replicaCount=1 \
-            --set=api.replicaCount=0 \
-            --set=inferenceCapability.replicaCount=0 \
-            --set=indexCapability.replicaCount=0 \
-            --set=celery_beat.replicaCount=0 \
-            --set=celery_worker_heavy.replicaCount=0 \
-            --set=celery_worker_docfetching.replicaCount=0 \
-            --set=celery_worker_docprocessing.replicaCount=0 \
-            --set=celery_worker_light.replicaCount=0 \
-            --set=celery_worker_monitoring.replicaCount=0 \
-            --set=celery_worker_primary.replicaCount=0 \
-            --set=celery_worker_user_file_processing.replicaCount=0 \
-            --set=celery_worker_user_files_indexing.replicaCount=0" \
-          --helm-extra-args="--timeout 900s --debug" \
-          --debug --config ct.yaml
-        CT_EXIT=$?
-        set -e
+          echo "=== Recent logs for debugging ==="
+          kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"

-        if [[ $CT_EXIT -ne 0 ]]; then
-          echo "ct install failed with exit code $CT_EXIT"
-          exit $CT_EXIT
-        else
-          echo "=== Installation completed successfully ==="
-        fi
-
-        kubectl get pods --all-namespaces
-
-    - name: Post-install verification
-      if: steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Post-install verification ==="
-        kubectl get pods --all-namespaces
-        kubectl get services --all-namespaces
-        # Only show issues if they exist
-        kubectl describe pods --all-namespaces | grep -A 5 -B 2 "Failed\|Error\|Warning" || echo "No pod issues found"
-
-    - name: Cleanup on failure
-      if: failure() && steps.list-changed.outputs.changed == 'true'
-      run: |
-        echo "=== Cleanup on failure ==="
-        echo "=== Final cluster state ==="
-        kubectl get pods --all-namespaces
-        kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
-
-        echo "=== Pod descriptions for debugging ==="
-        kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
-
-        echo "=== Recent logs for debugging ==="
-        kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
-
-        echo "=== Helm releases ==="
-        helm list --all-namespaces
-      # the following would install only changed charts, but we only have one chart so
-      # don't worry about that for now
-      # run: ct install --target-branch ${{ github.event.repository.default_branch }}
+          echo "=== Helm releases ==="
+          helm list --all-namespaces
+        # the following would install only changed charts, but we only have one chart so
+        # don't worry about that for now
+        # run: ct install --target-branch ${{ github.event.repository.default_branch }}
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -33,6 +33,11 @@ env:
  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
  PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
  EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
+  GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN }}
+  GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC: ${{ secrets.ONYX_GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC }}
+  GITHUB_ADMIN_EMAIL: ${{ secrets.ONYX_GITHUB_ADMIN_EMAIL }}
+  GITHUB_TEST_USER_1_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_1_EMAIL }}
+  GITHUB_TEST_USER_2_EMAIL: ${{ secrets.ONYX_GITHUB_TEST_USER_2_EMAIL }}

 jobs:
  discover-test-dirs:
@@ -51,7 +56,7 @@ jobs:
        id: set-matrix
        run: |
          # Find all leaf-level directories in both test directories
-          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
+          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" ! -name "mcp" -exec basename {} \; | sort)
          connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)

          # Create JSON array with directory info
@@ -67,9 +72,14 @@ jobs:
          all_dirs="[${all_dirs%,}]"
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

-
  build-backend-image:
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-backend-image",
+        "extras=ecr-cache",
+      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -122,9 +132,14 @@ jobs:
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:backend-cache,mode=max
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

-
  build-model-server-image:
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-model-server-image",
+        "extras=ecr-cache",
+      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -176,9 +191,14 @@ jobs:
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max

-
  build-integration-image:
-    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=2cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-integration-image",
+        "extras=ecr-cache",
+      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -220,7 +240,7 @@ jobs:
          CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
          HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
        run: |
-          cd backend && docker buildx bake --push \
+          docker buildx bake --push \
            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
@@ -290,7 +310,9 @@ jobs:
          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
          INTEGRATION_TESTS_MODE=true
          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
+          AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
          MCP_SERVER_ENABLED=true
+          USE_LIGHTWEIGHT_BACKGROUND_WORKER=false
          EOF

      - name: Start Docker containers
@@ -304,7 +326,6 @@ jobs:
            api_server \
            inference_model_server \
            indexing_model_server \
-            mcp_server \
            background \
            -d
        id: start_docker
@@ -347,12 +368,6 @@ jobs:
          }

          wait_for_service "http://localhost:8080/health" "API server"
-          test_dir="${{ matrix.test-dir.path }}"
-          if [ "$test_dir" = "tests/mcp" ]; then
-            wait_for_service "http://localhost:8090/health" "MCP server"
-          else
-            echo "Skipping MCP server wait for non-MCP suite: $test_dir"
-          fi
          echo "Finished waiting for services."

      - name: Start Mock Services
@@ -382,8 +397,6 @@ jobs:
              -e VESPA_HOST=index \
              -e REDIS_HOST=cache \
              -e API_SERVER_HOST=api_server \
-              -e MCP_SERVER_HOST=mcp_server \
-              -e MCP_SERVER_PORT=8090 \
              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
              -e EXA_API_KEY=${EXA_API_KEY} \
              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
@@ -399,6 +412,11 @@ jobs:
              -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
              -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
              -e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
+              -e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN} \
+              -e GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC=${GITHUB_PERMISSION_SYNC_TEST_ACCESS_TOKEN_CLASSIC} \
+              -e GITHUB_ADMIN_EMAIL=${GITHUB_ADMIN_EMAIL} \
+              -e GITHUB_TEST_USER_1_EMAIL=${GITHUB_TEST_USER_1_EMAIL} \
+              -e GITHUB_TEST_USER_2_EMAIL=${GITHUB_TEST_USER_2_EMAIL} \
              -e TEST_WEB_HOSTNAME=test-runner \
              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
@@ -421,21 +439,22 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

-
  multitenant-tests:
    needs:
+      [build-backend-image, build-model-server-image, build-integration-image]
+    runs-on:
      [
-        build-backend-image,
-        build-model-server-image,
-        build-integration-image,
+        runs-on,
+        runner=8cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-multitenant-tests",
+        "extras=ecr-cache",
      ]
-    runs-on: [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-multitenant-tests", "extras=ecr-cache"]
    timeout-minutes: 45

    steps:
@@ -462,10 +481,10 @@ jobs:
          AUTH_TYPE=cloud \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
+          OPENAI_DEFAULT_API_KEY=${OPENAI_API_KEY} \
          ONYX_BACKEND_IMAGE=${ECR_CACHE}:integration-test-backend-test-${RUN_ID} \
          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID} \
          DEV_MODE=true \
-          MCP_SERVER_ENABLED=true \
          docker compose -f docker-compose.multitenant-dev.yml up \
            relational_db \
            index \
@@ -474,7 +493,6 @@ jobs:
            api_server \
            inference_model_server \
            indexing_model_server \
-            mcp_server \
            background \
            -d
        id: start_docker_multi_tenant
@@ -523,8 +541,6 @@ jobs:
            -e VESPA_HOST=index \
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
-            -e MCP_SERVER_HOST=mcp_server \
-            -e MCP_SERVER_PORT=8090 \
            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
            -e EXA_API_KEY=${EXA_API_KEY} \
            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
@@ -552,7 +568,7 @@ jobs:

      - name: Upload logs (multi-tenant)
        if: always()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: docker-all-logs-multitenant
          path: ${{ github.workspace }}/docker-compose-multitenant.log
--- a/.github/workflows/pr-jest-tests.yml
+++ b/.github/workflows/pr-jest-tests.yml
@@ -4,7 +4,14 @@ concurrency:
  cancel-in-progress: true

 on:
+  merge_group:
+  pull_request:
+    branches:
+      - main
+      - "release/**"
  push:
+    tags:
+      - "v*.*.*"

 permissions:
  contents: read
@@ -37,7 +44,7 @@ jobs:

      - name: Upload coverage reports
        if: always()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: jest-coverage-${{ github.run_id }}
          path: ./web/coverage
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -48,7 +48,7 @@ jobs:
        id: set-matrix
        run: |
          # Find all leaf-level directories in both test directories
-          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
+          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" ! -name "mcp" -exec basename {} \; | sort)
          connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)

          # Create JSON array with directory info
@@ -65,7 +65,13 @@ jobs:
          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT

  build-backend-image:
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-backend-image",
+        "extras=ecr-cache",
+      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -119,7 +125,13 @@ jobs:
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-model-server-image:
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-model-server-image",
+        "extras=ecr-cache",
+      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -172,7 +184,13 @@ jobs:
            type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max

  build-integration-image:
-    runs-on: [runs-on, runner=2cpu-linux-arm64, "run-id=${{ github.run_id }}-build-integration-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=2cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-integration-image",
+        "extras=ecr-cache",
+      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -214,7 +232,7 @@ jobs:
          CACHE_SUFFIX: ${{ steps.format-branch.outputs.cache-suffix }}
          HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
        run: |
-          cd backend && docker buildx bake --push \
+          docker buildx bake --push \
            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${HEAD_SHA} \
            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache-${CACHE_SUFFIX} \
            --set backend.cache-from=type=registry,ref=${RUNS_ON_ECR_CACHE}:backend-cache \
@@ -283,6 +301,7 @@ jobs:
          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
          INTEGRATION_TESTS_MODE=true
          MCP_SERVER_ENABLED=true
+          AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
          EOF

      - name: Start Docker containers
@@ -296,7 +315,6 @@ jobs:
            api_server \
            inference_model_server \
            indexing_model_server \
-            mcp_server \
            background \
            -d
        id: start_docker
@@ -339,12 +357,6 @@ jobs:
          }

          wait_for_service "http://localhost:8080/health" "API server"
-          test_dir="${{ matrix.test-dir.path }}"
-          if [ "$test_dir" = "tests/mcp" ]; then
-            wait_for_service "http://localhost:8090/health" "MCP server"
-          else
-            echo "Skipping MCP server wait for non-MCP suite: $test_dir"
-          fi
          echo "Finished waiting for services."

      - name: Start Mock Services
@@ -375,8 +387,6 @@ jobs:
              -e VESPA_HOST=index \
              -e REDIS_HOST=cache \
              -e API_SERVER_HOST=api_server \
-              -e MCP_SERVER_HOST=mcp_server \
-              -e MCP_SERVER_PORT=8090 \
              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
              -e EXA_API_KEY=${EXA_API_KEY} \
              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
@@ -414,13 +424,12 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

-
  required:
    # NOTE: Github-hosted runners have about 20s faster queue times and are preferred here.
    runs-on: ubuntu-slim
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -4,7 +4,14 @@ concurrency:
  cancel-in-progress: true

 on:
+  merge_group:
+  pull_request:
+    branches:
+      - main
+      - "release/**"
  push:
+    tags:
+      - "v*.*.*"

 permissions:
  contents: read
@@ -47,7 +54,13 @@ env:

 jobs:
  build-web-image:
-    runs-on: [runs-on, runner=4cpu-linux-arm64, "run-id=${{ github.run_id }}-build-web-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=4cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-web-image",
+        "extras=ecr-cache",
+      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -102,7 +115,13 @@ jobs:
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-backend-image:
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-backend-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-backend-image",
+        "extras=ecr-cache",
+      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -157,7 +176,13 @@ jobs:
          no-cache: ${{ vars.DOCKER_NO_CACHE == 'true' }}

  build-model-server-image:
-    runs-on: [runs-on, runner=1cpu-linux-arm64, "run-id=${{ github.run_id }}-build-model-server-image", "extras=ecr-cache"]
+    runs-on:
+      [
+        runs-on,
+        runner=1cpu-linux-arm64,
+        "run-id=${{ github.run_id }}-build-model-server-image",
+        "extras=ecr-cache",
+      ]
    timeout-minutes: 45
    steps:
      - uses: runs-on/action@cd2b598b0515d39d78c38a02d529db87d2196d1e # ratchet:runs-on/action@v2
@@ -231,14 +256,13 @@ jobs:
      - name: Checkout code
        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
        with:
-          fetch-depth: 0
          persist-credentials: false

      - name: Setup node
        uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # ratchet:actions/setup-node@v4
        with:
          node-version: 22
-          cache: 'npm'
+          cache: "npm"
          cache-dependency-path: ./web/package-lock.json

      - name: Install node dependencies
@@ -411,7 +435,7 @@ jobs:
          fi
          npx playwright test --project ${PROJECT}

-      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+      - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        if: always()
        with:
          # Includes test results and trace.zip files
@@ -431,7 +455,7 @@ jobs:

      - name: Upload logs
        if: success() || failure()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log
@@ -447,7 +471,6 @@ jobs:
        if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || contains(needs.*.result, 'skipped') }}
        run: exit 1

-
 # NOTE: Chromatic UI diff testing is currently disabled.
 # We are using Playwright for local and CI testing without visual regression checks.
 # Chromatic may be reintroduced in the future for UI diff testing if needed.
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -5,11 +5,6 @@ on:
    # This cron expression runs the job daily at 16:00 UTC (9am PT)
    - cron: "0 16 * * *"
  workflow_dispatch:
-    inputs:
-      branch:
-        description: 'Branch to run the workflow on'
-        required: false
-        default: 'main'

 permissions:
  contents: read
@@ -31,7 +26,11 @@ env:
 jobs:
  model-check:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}-model-check"]
+    runs-on:
+      - runs-on
+      - runner=4cpu-linux-arm64
+      - "run-id=${{ github.run_id }}-model-check"
+      - "extras=ecr-cache"
    timeout-minutes: 45

    env:
@@ -43,108 +42,87 @@ jobs:
        with:
          persist-credentials: false

+      - name: Setup Python and Install Dependencies
+        uses: ./.github/actions/setup-python-and-install-dependencies
+        with:
+          requirements: |
+            backend/requirements/default.txt
+            backend/requirements/dev.txt
+
+      - name: Format branch name for cache
+        id: format-branch
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          REF_NAME: ${{ github.ref_name }}
+        run: |
+          if [ -n "${PR_NUMBER}" ]; then
+            CACHE_SUFFIX="${PR_NUMBER}"
+          else
+            # shellcheck disable=SC2001
+            CACHE_SUFFIX=$(echo "${REF_NAME}" | sed 's/[^A-Za-z0-9._-]/-/g')
+          fi
+          echo "cache-suffix=${CACHE_SUFFIX}" >> $GITHUB_OUTPUT
+
      - name: Login to Docker Hub
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # ratchet:docker/login-action@v3
+        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      # tag every docker image with "test" so that we can spin up the correct set
-      # of images during testing
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435

-      # We don't need to build the Web Docker image since it's not yet used
-      # in the integration tests. We have a separate action to verify that it builds
-      # successfully.
-      - name: Pull Model Server Docker image
-        run: |
-          docker pull onyxdotapp/onyx-model-server:latest
-          docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
-
-      - name: Set up Python
-        uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # ratchet:actions/setup-python@v6
+      - name: Build and load
+        uses: docker/bake-action@5be5f02ff8819ecd3092ea6b2e6261c31774f2b4 # ratchet:docker/bake-action@v6
+        env:
+          TAG: model-server-${{ github.run_id }}
        with:
-          python-version: "3.11"
-          cache: "pip"
-          cache-dependency-path: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
-
-      - name: Install Dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+          load: true
+          targets: model-server
+          set: |
+            model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }}
+            model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }}
+            model-server.cache-from=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache
+            model-server.cache-from=type=registry,ref=onyxdotapp/onyx-model-server:latest
+            model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ github.event.pull_request.head.sha || github.sha }},mode=max
+            model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache-${{ steps.format-branch.outputs.cache-suffix }},mode=max
+            model-server.cache-to=type=registry,ref=${{ env.RUNS_ON_ECR_CACHE }}:model-server-cache,mode=max

      - name: Start Docker containers
+        id: start_docker
+        env:
+          IMAGE_TAG: model-server-${{ github.run_id }}
        run: |
          cd deployment/docker_compose
-          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
-          AUTH_TYPE=basic \
-          REQUIRE_EMAIL_VERIFICATION=false \
-          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
-          docker compose -f docker-compose.model-server-test.yml up -d indexing_model_server
-        id: start_docker
-
-      - name: Wait for service to be ready
-        run: |
-          echo "Starting wait-for-service script..."
-
-          start_time=$(date +%s)
-          timeout=300  # 5 minutes in seconds
-
-          while true; do
-            current_time=$(date +%s)
-            elapsed_time=$((current_time - start_time))
-
-            if [ $elapsed_time -ge $timeout ]; then
-              echo "Timeout reached. Service did not become ready in 5 minutes."
-              exit 1
-            fi
-
-            # Use curl with error handling to ignore specific exit code 56
-            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
-
-            if [ "$response" = "200" ]; then
-              echo "Service is ready!"
-              break
-            elif [ "$response" = "curl_error" ]; then
-              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
-            else
-              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
-            fi
-
-            sleep 5
-          done
-          echo "Finished waiting for service."
+          docker compose \
+            -f docker-compose.yml \
+            -f docker-compose.dev.yml \
+            up -d --wait \
+            inference_model_server

      - name: Run Tests
-        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm
          py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding

      - name: Alert on Failure
        if: failure() && github.event_name == 'schedule'
-        env:
-          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
-          REPO: ${{ github.repository }}
-          RUN_ID: ${{ github.run_id }}
-        run: |
-          curl -X POST \
-            -H 'Content-type: application/json' \
-            --data "{\"text\":\"Scheduled Model Tests failed! Check the run at: https://github.com/${REPO}/actions/runs/${RUN_ID}\"}" \
-            $SLACK_WEBHOOK
+        uses: ./.github/actions/slack-notify
+        with:
+          webhook-url: ${{ secrets.SLACK_WEBHOOK }}
+          failed-jobs: model-check
+          title: "🚨 Scheduled Model Tests failed!"
+          ref-name: ${{ github.ref_name }}

      - name: Dump all-container logs (optional)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.model-server-test.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: docker-all-logs
          path: ${{ github.workspace }}/docker-compose.log
--- a/.github/workflows/release-devtools.yml
+++ b/.github/workflows/release-devtools.yml
@@ -16,21 +16,22 @@ jobs:
    strategy:
      matrix:
        os-arch:
-          - {goos: "linux", goarch: "amd64"}
-          - {goos: "linux", goarch: "arm64"}
-          - {goos: "windows", goarch: "amd64"}
-          - {goos: "windows", goarch: "arm64"}
-          - {goos: "darwin", goarch: "amd64"}
-          - {goos: "darwin", goarch: "arm64"}
-          - {goos: "", goarch: ""}
+          - { goos: "linux", goarch: "amd64" }
+          - { goos: "linux", goarch: "arm64" }
+          - { goos: "windows", goarch: "amd64" }
+          - { goos: "windows", goarch: "arm64" }
+          - { goos: "darwin", goarch: "amd64" }
+          - { goos: "darwin", goarch: "arm64" }
+          - { goos: "", goarch: "" }
    steps:
      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # ratchet:actions/checkout@v6
        with:
          persist-credentials: false
          fetch-depth: 0
-      - uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # ratchet:astral-sh/setup-uv@v7
+      - uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
+          version: "0.9.9"
      - run: |
          GOOS="${{ matrix.os-arch.goos }}" \
          GOARCH="${{ matrix.os-arch.goarch }}" \
--- a/.github/workflows/zizmor.yml
+++ b/.github/workflows/zizmor.yml
@@ -21,17 +21,29 @@ jobs:
        with:
          persist-credentials: false

+      - name: Detect changes
+        id: filter
+        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # ratchet:dorny/paths-filter@v3
+        with:
+          filters: |
+            zizmor:
+              - '.github/**'
+
      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # ratchet:astral-sh/setup-uv@v7.1.4
+        if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
+        uses: astral-sh/setup-uv@ed21f2f24f8dd64503750218de024bcf64c7250a # ratchet:astral-sh/setup-uv@v7
        with:
          enable-cache: false
+          version: "0.9.9"

      - name: Run zizmor
+        if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
        run: uv run --no-sync --with zizmor zizmor --format=sarif . > results.sarif
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - name: Upload SARIF file
+        if: steps.filter.outputs.zizmor == 'true' || github.ref_name == 'main'
        uses: github/codeql-action/upload-sarif@ba454b8ab46733eb6145342877cd148270bb77ab # ratchet:github/codeql-action/upload-sarif@codeql-bundle-v2.23.5
        with:
          sarif_file: results.sarif
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,7 @@ backend/tests/regression/search_quality/*.json
 backend/onyx/evals/data/
 backend/onyx/evals/one_off/*.json
 *.log
+*.csv

 # secret files
 .env
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -8,30 +8,65 @@ repos:
    # From: https://github.com/astral-sh/uv-pre-commit/pull/53/commits/d30b4298e4fb63ce8609e29acdbcf4c9018a483c
    rev: d30b4298e4fb63ce8609e29acdbcf4c9018a483c
    hooks:
-      - id: uv-run
-        name: Check lazy imports
-        args: ["--with=onyx-devtools", "ods", "check-lazy-imports"]
-        files: ^backend/(?!\.venv/).*\.py$
      - id: uv-sync
        args: ["--locked", "--all-extras"]
      - id: uv-lock
-        files: ^pyproject\.toml$
      - id: uv-export
        name: uv-export default.txt
-        args: ["--no-emit-project", "--no-default-groups", "--no-hashes", "--extra", "backend", "-o", "backend/requirements/default.txt"]
+        args:
+          [
+            "--no-emit-project",
+            "--no-default-groups",
+            "--no-hashes",
+            "--extra",
+            "backend",
+            "-o",
+            "backend/requirements/default.txt",
+          ]
        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
      - id: uv-export
        name: uv-export dev.txt
-        args: ["--no-emit-project", "--no-default-groups", "--no-hashes", "--extra", "dev", "-o", "backend/requirements/dev.txt"]
+        args:
+          [
+            "--no-emit-project",
+            "--no-default-groups",
+            "--no-hashes",
+            "--extra",
+            "dev",
+            "-o",
+            "backend/requirements/dev.txt",
+          ]
        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
      - id: uv-export
        name: uv-export ee.txt
-        args: ["--no-emit-project", "--no-default-groups", "--no-hashes", "--extra", "ee", "-o", "backend/requirements/ee.txt"]
+        args:
+          [
+            "--no-emit-project",
+            "--no-default-groups",
+            "--no-hashes",
+            "--extra",
+            "ee",
+            "-o",
+            "backend/requirements/ee.txt",
+          ]
        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
      - id: uv-export
        name: uv-export model_server.txt
-        args: ["--no-emit-project", "--no-default-groups", "--no-hashes", "--extra", "model_server", "-o", "backend/requirements/model_server.txt"]
+        args:
+          [
+            "--no-emit-project",
+            "--no-default-groups",
+            "--no-hashes",
+            "--extra",
+            "model_server",
+            "-o",
+            "backend/requirements/model_server.txt",
+          ]
        files: ^(pyproject\.toml|uv\.lock|backend/requirements/.*\.txt)$
+      - id: uv-run
+        name: Check lazy imports
+        args: ["--active", "--with=onyx-devtools", "ods", "check-lazy-imports"]
+        files: ^backend/(?!\.venv/).*\.py$
      # NOTE: This takes ~6s on a single, large module which is prohibitively slow.
      # - id: uv-run
      #   name: mypy
@@ -39,69 +74,68 @@ repos:
      #   pass_filenames: true
      #   files: ^backend/.*\.py$

-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: 3e8a8703264a2f4a69428a0aa4dcb512790b2c8c  # frozen: v6.0.0
-    hooks:
-      - id: check-yaml
-        files: ^.github/
-
  - repo: https://github.com/rhysd/actionlint
-    rev: a443f344ff32813837fa49f7aa6cbc478d770e62  # frozen: v1.7.9
+    rev: a443f344ff32813837fa49f7aa6cbc478d770e62 # frozen: v1.7.9
    hooks:
      - id: actionlint

  - repo: https://github.com/psf/black
    rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # frozen: 25.1.0
    hooks:
-    - id: black
-      language_version: python3.11
+      - id: black
+        language_version: python3.11

  # this is a fork which keeps compatibility with black
  - repo: https://github.com/wimglenn/reorder-python-imports-black
-    rev: f55cd27f90f0cf0ee775002c2383ce1c7820013d  # frozen: v3.14.0
+    rev: f55cd27f90f0cf0ee775002c2383ce1c7820013d # frozen: v3.14.0
    hooks:
-    - id: reorder-python-imports
-      args: ['--py311-plus', '--application-directories=backend/']
-      # need to ignore alembic files, since reorder-python-imports gets confused
-      # and thinks that alembic is a local package since there is a folder
-      # in the backend directory called `alembic`
-      exclude: ^backend/alembic/
+      - id: reorder-python-imports
+        args: ["--py311-plus", "--application-directories=backend/"]
+        # need to ignore alembic files, since reorder-python-imports gets confused
+        # and thinks that alembic is a local package since there is a folder
+        # in the backend directory called `alembic`
+        exclude: ^backend/alembic/

  # These settings will remove unused imports with side effects
  # Note: The repo currently does not and should not have imports with side effects
  - repo: https://github.com/PyCQA/autoflake
-    rev: 0544741e2b4a22b472d9d93e37d4ea9153820bb1  # frozen: v2.3.1
+    rev: 0544741e2b4a22b472d9d93e37d4ea9153820bb1 # frozen: v2.3.1
    hooks:
      - id: autoflake
-        args: [ '--remove-all-unused-imports', '--remove-unused-variables', '--in-place' , '--recursive']
+        args:
+          [
+            "--remove-all-unused-imports",
+            "--remove-unused-variables",
+            "--in-place",
+            "--recursive",
+          ]

  - repo: https://github.com/golangci/golangci-lint
-    rev: 9f61b0f53f80672872fced07b6874397c3ed197b  # frozen: v2.7.2
+    rev: 9f61b0f53f80672872fced07b6874397c3ed197b # frozen: v2.7.2
    hooks:
      - id: golangci-lint
        entry: bash -c "find tools/ -name go.mod -print0 | xargs -0 -I{} bash -c 'cd \"$(dirname {})\" && golangci-lint run ./...'"

  - repo: https://github.com/astral-sh/ruff-pre-commit
    # Ruff version.
-    rev: 971923581912ef60a6b70dbf0c3e9a39563c9d47  # frozen: v0.11.4
+    rev: 971923581912ef60a6b70dbf0c3e9a39563c9d47 # frozen: v0.11.4
    hooks:
      - id: ruff

  - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: ffb6a759a979008c0e6dff86e39f4745a2d9eac4  # frozen: v3.1.0
+    rev: ffb6a759a979008c0e6dff86e39f4745a2d9eac4 # frozen: v3.1.0
    hooks:
-    - id: prettier
-      types_or: [html, css, javascript, ts, tsx]
-      language_version: system
+      - id: prettier
+        types_or: [html, css, javascript, ts, tsx]
+        language_version: system

  - repo: https://github.com/sirwart/ripsecrets
-    rev: 7d94620933e79b8acaa0cd9e60e9864b07673d86  # frozen: v0.1.11
+    rev: 7d94620933e79b8acaa0cd9e60e9864b07673d86 # frozen: v0.1.11
    hooks:
      - id: ripsecrets
        args:
-        - --additional-pattern
-        - ^sk-[A-Za-z0-9_\-]{20,}$
-
+          - --additional-pattern
+          - ^sk-[A-Za-z0-9_\-]{20,}$

  - repo: local
    hooks:
@@ -112,9 +146,29 @@ repos:
        pass_filenames: false
        files: \.tf$

+      - id: npm-install
+        name: npm install
+        description: "Automatically run 'npm install' after a checkout, pull or rebase"
+        language: system
+        entry: bash -c 'cd web && npm install --no-save'
+        pass_filenames: false
+        files: ^web/package(-lock)?\.json$
+        stages: [post-checkout, post-merge, post-rewrite]
+      - id: npm-install-check
+        name: npm install --package-lock-only
+        description: "Check the 'web/package-lock.json' is updated"
+        language: system
+        entry: bash -c 'cd web && npm install --package-lock-only'
+        pass_filenames: false
+        files: ^web/package(-lock)?\.json$
+
+      # Uses tsgo (TypeScript's native Go compiler) for ~10x faster type checking.
+      # This is a preview package - if it breaks:
+      #   1. Try updating: cd web && npm update @typescript/native-preview
+      #   2. Or fallback to tsc: replace 'tsgo' with 'tsc' below
      - id: typescript-check
        name: TypeScript type check
-        entry: bash -c 'cd web && npm run types:check'
+        entry: bash -c 'cd web && npx tsgo --noEmit --project tsconfig.types.json'
        language: system
        pass_filenames: false
        files: ^web/.*\.(ts|tsx)$
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@@ -1,36 +1,39 @@
-# Copy this file to .env in the .vscode folder
-# Fill in the <REPLACE THIS> values as needed, it is recommended to set the GEN_AI_API_KEY value to avoid having to set up an LLM in the UI
-# Also check out onyx/backend/scripts/restart_containers.sh for a script to restart the containers which Onyx relies on outside of VSCode/Cursor processes
+# Copy this file to .env in the .vscode folder.
+# Fill in the <REPLACE THIS> values as needed; it is recommended to set the
+# GEN_AI_API_KEY value to avoid having to set up an LLM in the UI.
+# Also check out onyx/backend/scripts/restart_containers.sh for a script to
+# restart the containers which Onyx relies on outside of VSCode/Cursor
+# processes.

-# For local dev, often user Authentication is not needed
+
+# For local dev, often user Authentication is not needed.
 AUTH_TYPE=disabled

-# Always keep these on for Dev
-# Logs model prompts, reasoning, and answer to stdout
+
+# Always keep these on for Dev.
+# Logs model prompts, reasoning, and answer to stdout.
 LOG_ONYX_MODEL_INTERACTIONS=True
 # More verbose logging
 LOG_LEVEL=debug


-# This passes top N results to LLM an additional time for reranking prior to answer generation
-# This step is quite heavy on token usage so we disable it for dev generally
-DISABLE_LLM_DOC_RELEVANCE=False
-
-
-# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically)
+# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically).
 OAUTH_CLIENT_ID=<REPLACE THIS>
 OAUTH_CLIENT_SECRET=<REPLACE THIS>
 OPENID_CONFIG_URL=<REPLACE THIS>
 SAML_CONF_DIR=/<ABSOLUTE PATH TO ONYX>/onyx/backend/ee/onyx/configs/saml_config

-# Generally not useful for dev, we don't generally want to set up an SMTP server for dev
+
+# Generally not useful for dev, we don't generally want to set up an SMTP server
+# for dev.
 REQUIRE_EMAIL_VERIFICATION=False


-# Set these so if you wipe the DB, you don't end up having to go through the UI every time
+# Set these so if you wipe the DB, you don't end up having to go through the UI
+# every time.
 GEN_AI_API_KEY=<REPLACE THIS>
 OPENAI_API_KEY=<REPLACE THIS>
-# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper
+# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper.
 GEN_AI_MODEL_VERSION=gpt-4o
 FAST_GEN_AI_MODEL_VERSION=gpt-4o

@@ -40,26 +43,36 @@ PYTHONPATH=../backend
 PYTHONUNBUFFERED=1


-# Enable the full set of Danswer Enterprise Edition features
-# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development)
+# Enable the full set of Danswer Enterprise Edition features.
+# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you
+# are using this for local testing/development).
 ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False

+
 # S3 File Store Configuration (MinIO for local development)
 S3_ENDPOINT_URL=http://localhost:9004
 S3_FILE_STORE_BUCKET_NAME=onyx-file-store-bucket
 S3_AWS_ACCESS_KEY_ID=minioadmin
 S3_AWS_SECRET_ACCESS_KEY=minioadmin

-# Show extra/uncommon connectors
+
+# Show extra/uncommon connectors.
 SHOW_EXTRA_CONNECTORS=True

+
 # Local langsmith tracing
 LANGSMITH_TRACING="true"
 LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
 LANGSMITH_API_KEY=<REPLACE_THIS>
 LANGSMITH_PROJECT=<REPLACE_THIS>

+
 # Local Confluence OAuth testing
 # OAUTH_CONFLUENCE_CLOUD_CLIENT_ID=<REPLACE_THIS>
 # OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET=<REPLACE_THIS>
-# NEXT_PUBLIC_TEST_ENV=True
+# NEXT_PUBLIC_TEST_ENV=True
+
+
+# OpenSearch
+# Arbitrary password is fine for local development.
+OPENSEARCH_INITIAL_ADMIN_PASSWORD=<REPLACE THIS>
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -512,6 +512,21 @@
        "group": "3"
      }
    },
+    {
+      "name": "Clear and Restart OpenSearch Container",
+      // Generic debugger type, required arg but has no bearing on bash.
+      "type": "node",
+      "request": "launch",
+      "runtimeExecutable": "bash",
+      "runtimeArgs": [
+        "${workspaceFolder}/backend/scripts/restart_opensearch_container.sh"
+      ],
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
+      "presentation": {
+        "group": "3"
+      }
+    },
    {
      "name": "Eval CLI",
      "type": "debugpy",
--- a/AGENTS.md.template
+++ b/AGENTS.md.template
@@ -1,13 +1,13 @@
 # AGENTS.md

-This file provides guidance to Codex when working with code in this repository.
+This file provides guidance to AI agents when working with code in this repository.

 ## KEY NOTES

- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
+- If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
 to assume the python venv.
 - To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
+- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
 `a`. The app can be accessed at `http://localhost:3000`.
 - You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
 make sure we see logs coming out from the relevant service.
@@ -181,6 +181,286 @@ web/
 └── src/lib/                     # Utilities & business logic
 ```

+## Frontend Standards
+
+### 1. Import Standards
+
+**Always use absolute imports with the `@` prefix.**
+
+**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.
+
+```typescript
+// ✅ Good
+import { Button } from "@/components/ui/button";
+import { useAuth } from "@/hooks/useAuth";
+import { Text } from "@/refresh-components/texts/Text";
+
+// ❌ Bad
+import { Button } from "../../../components/ui/button";
+import { useAuth } from "./hooks/useAuth";
+```
+
+### 2. React Component Functions
+
+**Prefer regular functions over arrow functions for React components.**
+
+**Reason:** Functions just become easier to read.
+
+```typescript
+// ✅ Good
+function UserProfile({ userId }: UserProfileProps) {
+  return <div>User Profile</div>
+}
+
+// ❌ Bad
+const UserProfile = ({ userId }: UserProfileProps) => {
+  return <div>User Profile</div>
+}
+```
+
+### 3. Props Interface Extraction
+
+**Extract prop types into their own interface definitions.**
+
+**Reason:** Functions just become easier to read.
+
+```typescript
+// ✅ Good
+interface UserCardProps {
+  user: User
+  showActions?: boolean
+  onEdit?: (userId: string) => void
+}
+
+function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
+  return <div>User Card</div>
+}
+
+// ❌ Bad
+function UserCard({
+  user,
+  showActions = false,
+  onEdit
+}: {
+  user: User
+  showActions?: boolean
+  onEdit?: (userId: string) => void
+}) {
+  return <div>User Card</div>
+}
+```
+
+### 4. Spacing Guidelines
+
+**Prefer padding over margins for spacing.**
+
+**Reason:** We want to consolidate usage to paddings instead of margins.
+
+```typescript
+// ✅ Good
+<div className="p-4 space-y-2">
+  <div className="p-2">Content</div>
+</div>
+
+// ❌ Bad
+<div className="m-4 space-y-2">
+  <div className="m-2">Content</div>
+</div>
+```
+
+### 5. Tailwind Dark Mode
+
+**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**
+
+**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.
+
+**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.
+
+```typescript
+// ✅ Good - Standard components use `web/tailwind-themes/tailwind.config.js` / `web/src/app/css/colors.css`
+<div className="bg-background-neutral-03 text-text-02">
+  Content
+</div>
+
+// ✅ Good - Logo icons with dark mode handling via createLogoIcon
+export const GithubIcon = createLogoIcon(githubLightIcon, {
+  monochromatic: true,  // Will apply dark:invert internally
+});
+
+export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
+  darkSrc: gitbookDarkIcon,  // Will use dark:hidden/dark:block internally
+});
+
+// ❌ Bad - Manual dark mode overrides
+<div className="bg-white dark:bg-black text-black dark:text-white">
+  Content
+</div>
+```
+
+### 6. Class Name Utilities
+
+**Use the `cn` utility instead of raw string formatting for classNames.**
+
+**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.
+
+```typescript
+import { cn } from '@/lib/utils'
+
+// ✅ Good
+<div className={cn(
+  'base-class',
+  isActive && 'active-class',
+  className
+)}>
+  Content
+</div>
+
+// ❌ Bad
+<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
+  Content
+</div>
+```
+
+### 7. Custom Hooks Organization
+
+**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**
+
+**Reason:** This is just a layout preference. Keeps code clean.
+
+```typescript
+// web/src/hooks/useUserData.ts
+export function useUserData(userId: string) {
+  // hook implementation
+}
+
+// web/src/hooks/useLocalStorage.ts
+export function useLocalStorage<T>(key: string, initialValue: T) {
+  // hook implementation
+}
+```
+
+### 8. Icon Usage
+
+**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**
+
+**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.
+
+```typescript
+// ✅ Good
+import SvgX from "@/icons/x";
+import SvgMoreHorizontal from "@/icons/more-horizontal";
+
+// ❌ Bad
+import { User } from "lucide-react";
+import { FiSearch } from "react-icons/fi";
+```
+
+**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
+If you need help with this step, reach out to `raunak@onyx.app`.
+
+### 9. Text Rendering
+
+**Prefer using the `refresh-components/texts/Text` component for all text rendering. Avoid "naked" text nodes.**
+
+**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It provides easy utilities to specify the text-colour and font-size in the form of flags. Super duper easy.
+
+```typescript
+// ✅ Good
+import { Text } from '@/refresh-components/texts/Text'
+
+function UserCard({ name }: { name: string }) {
+  return (
+    <Text
+      {/* The `text03` flag makes the text it renders to be coloured the 3rd-scale grey */}
+      text03
+      {/* The `mainAction` flag makes the text it renders to be "main-action" font + line-height + weightage, as described in the Figma */}
+      mainAction
+    >
+      {name}
+    </Text>
+  )
+}
+
+// ❌ Bad
+function UserCard({ name }: { name: string }) {
+  return (
+    <div>
+      <h2>{name}</h2>
+      <p>User details</p>
+    </div>
+  )
+}
+```
+
+### 10. Component Usage
+
+**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**
+
+**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.
+
+```typescript
+// ✅ Good
+import Button from '@/refresh-components/buttons/Button'
+import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
+import SvgPlusCircle from '@/icons/plus-circle'
+
+function ContactForm() {
+  return (
+    <form>
+      <InputTypeIn placeholder="Search..." />
+      <Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
+    </form>
+  )
+}
+
+// ❌ Bad
+function ContactForm() {
+  return (
+    <form>
+      <input placeholder="Name" />
+      <textarea placeholder="Message" />
+      <button type="submit">Submit</button>
+    </form>
+  )
+}
+```
+
+### 11. Colors
+
+**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**
+
+**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
+
+**Available color categories:**
+- **Text:** `text-01` through `text-05`, `text-inverted-XX`
+- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
+- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
+- **Actions:** `action-link-XX`, `action-danger-XX`
+- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
+- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.
+
+```typescript
+// ✅ Good - Use custom Onyx color classes
+<div className="bg-background-neutral-01 border border-border-02" />
+<div className="bg-background-tint-02 border border-border-01" />
+<div className="bg-status-success-01" />
+<div className="bg-action-link-01" />
+<div className="bg-theme-primary-05" />
+
+// ❌ Bad - Do NOT use standard Tailwind colors
+<div className="bg-gray-100 border border-gray-300 text-gray-600" />
+<div className="bg-white border border-slate-200" />
+<div className="bg-green-100 text-green-700" />
+<div className="bg-blue-100 text-blue-600" />
+<div className="bg-indigo-500" />
+```
+
+### 12. Data Fetching
+
+**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**
+
+**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).
+
 ## Database & Migrations

 ### Running Migrations
@@ -295,14 +575,6 @@ will be tailing their logs to this file.
 - Token management and rate limiting
 - Custom prompts and agent actions

-## UI/UX Patterns
-
- Tailwind CSS with design system in `web/src/components/ui/`
- Radix UI and Headless UI for accessible components
- SWR for data fetching and caching
- Form validation with react-hook-form
- Error handling with popup notifications
-
 ## Creating a Plan
 When creating a plan in the `plans` directory, make sure to include at least these elements:

--- a/CLAUDE.md.template
+++ b/CLAUDE.md.template
@@ -7,7 +7,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 - If you run into any missing python dependency errors, try running your command with `source .venv/bin/activate` \
 to assume the python venv.
 - To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
+- If using `playwright` to explore the frontend, you can usually log in with username `a@example.com` and password
 `a`. The app can be accessed at `http://localhost:3000`.
 - You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
 make sure we see logs coming out from the relevant service.
@@ -184,6 +184,286 @@ web/
 └── src/lib/                     # Utilities & business logic
 ```

+## Frontend Standards
+
+### 1. Import Standards
+
+**Always use absolute imports with the `@` prefix.**
+
+**Reason:** Moving files around becomes easier since you don't also have to update those import statements. This makes modifications to the codebase much nicer.
+
+```typescript
+// ✅ Good
+import { Button } from "@/components/ui/button";
+import { useAuth } from "@/hooks/useAuth";
+import { Text } from "@/refresh-components/texts/Text";
+
+// ❌ Bad
+import { Button } from "../../../components/ui/button";
+import { useAuth } from "./hooks/useAuth";
+```
+
+### 2. React Component Functions
+
+**Prefer regular functions over arrow functions for React components.**
+
+**Reason:** Functions just become easier to read.
+
+```typescript
+// ✅ Good
+function UserProfile({ userId }: UserProfileProps) {
+  return <div>User Profile</div>
+}
+
+// ❌ Bad
+const UserProfile = ({ userId }: UserProfileProps) => {
+  return <div>User Profile</div>
+}
+```
+
+### 3. Props Interface Extraction
+
+**Extract prop types into their own interface definitions.**
+
+**Reason:** Functions just become easier to read.
+
+```typescript
+// ✅ Good
+interface UserCardProps {
+  user: User
+  showActions?: boolean
+  onEdit?: (userId: string) => void
+}
+
+function UserCard({ user, showActions = false, onEdit }: UserCardProps) {
+  return <div>User Card</div>
+}
+
+// ❌ Bad
+function UserCard({
+  user,
+  showActions = false,
+  onEdit
+}: {
+  user: User
+  showActions?: boolean
+  onEdit?: (userId: string) => void
+}) {
+  return <div>User Card</div>
+}
+```
+
+### 4. Spacing Guidelines
+
+**Prefer padding over margins for spacing.**
+
+**Reason:** We want to consolidate usage to paddings instead of margins.
+
+```typescript
+// ✅ Good
+<div className="p-4 space-y-2">
+  <div className="p-2">Content</div>
+</div>
+
+// ❌ Bad
+<div className="m-4 space-y-2">
+  <div className="m-2">Content</div>
+</div>
+```
+
+### 5. Tailwind Dark Mode
+
+**Strictly forbid using the `dark:` modifier in Tailwind classes, except for logo icon handling.**
+
+**Reason:** The `colors.css` file already, VERY CAREFULLY, defines what the exact opposite colour of each light-mode colour is. Overriding this behaviour is VERY bad and will lead to horrible UI breakages.
+
+**Exception:** The `createLogoIcon` helper in `web/src/components/icons/icons.tsx` uses `dark:` modifiers (`dark:invert`, `dark:hidden`, `dark:block`) to handle third-party logo icons that cannot automatically adapt through `colors.css`. This is the ONLY acceptable use of dark mode modifiers.
+
+```typescript
+// ✅ Good - Standard components use `tailwind-themes/tailwind.config.js` / `src/app/css/colors.css`
+<div className="bg-background-neutral-03 text-text-02">
+  Content
+</div>
+
+// ✅ Good - Logo icons with dark mode handling via createLogoIcon
+export const GithubIcon = createLogoIcon(githubLightIcon, {
+  monochromatic: true,  // Will apply dark:invert internally
+});
+
+export const GitbookIcon = createLogoIcon(gitbookLightIcon, {
+  darkSrc: gitbookDarkIcon,  // Will use dark:hidden/dark:block internally
+});
+
+// ❌ Bad - Manual dark mode overrides
+<div className="bg-white dark:bg-black text-black dark:text-white">
+  Content
+</div>
+```
+
+### 6. Class Name Utilities
+
+**Use the `cn` utility instead of raw string formatting for classNames.**
+
+**Reason:** `cn`s are easier to read. They also allow for more complex types (i.e., string-arrays) to get formatted properly (it flattens each element in that string array down). As a result, it can allow things such as conditionals (i.e., `myCondition && "some-tailwind-class"`, which evaluates to `false` when `myCondition` is `false`) to get filtered out.
+
+```typescript
+import { cn } from '@/lib/utils'
+
+// ✅ Good
+<div className={cn(
+  'base-class',
+  isActive && 'active-class',
+  className
+)}>
+  Content
+</div>
+
+// ❌ Bad
+<div className={`base-class ${isActive ? 'active-class' : ''} ${className}`}>
+  Content
+</div>
+```
+
+### 7. Custom Hooks Organization
+
+**Follow a "hook-per-file" layout. Each hook should live in its own file within `web/src/hooks`.**
+
+**Reason:** This is just a layout preference. Keeps code clean.
+
+```typescript
+// web/src/hooks/useUserData.ts
+export function useUserData(userId: string) {
+  // hook implementation
+}
+
+// web/src/hooks/useLocalStorage.ts
+export function useLocalStorage<T>(key: string, initialValue: T) {
+  // hook implementation
+}
+```
+
+### 8. Icon Usage
+
+**ONLY use icons from the `web/src/icons` directory. Do NOT use icons from `react-icons`, `lucide`, or other external libraries.**
+
+**Reason:** We have a very carefully curated selection of icons that match our Onyx guidelines. We do NOT want to muddy those up with different aesthetic stylings.
+
+```typescript
+// ✅ Good
+import SvgX from "@/icons/x";
+import SvgMoreHorizontal from "@/icons/more-horizontal";
+
+// ❌ Bad
+import { User } from "lucide-react";
+import { FiSearch } from "react-icons/fi";
+```
+
+**Missing Icons**: If an icon is needed but doesn't exist in the `web/src/icons` directory, import it from Figma using the Figma MCP tool and add it to the icons directory.
+If you need help with this step, reach out to `raunak@onyx.app`.
+
+### 9. Text Rendering
+
+**Prefer using the `refresh-components/texts/Text` component for all text rendering. Avoid "naked" text nodes.**
+
+**Reason:** The `Text` component is fully compliant with the stylings provided in Figma. It provides easy utilities to specify the text-colour and font-size in the form of flags. Super duper easy.
+
+```typescript
+// ✅ Good
+import { Text } from '@/refresh-components/texts/Text'
+
+function UserCard({ name }: { name: string }) {
+  return (
+    <Text
+      {/* The `text03` flag makes the text it renders to be coloured the 3rd-scale grey */}
+      text03
+      {/* The `mainAction` flag makes the text it renders to be "main-action" font + line-height + weightage, as described in the Figma */}
+      mainAction
+    >
+      {name}
+    </Text>
+  )
+}
+
+// ❌ Bad
+function UserCard({ name }: { name: string }) {
+  return (
+    <div>
+      <h2>{name}</h2>
+      <p>User details</p>
+    </div>
+  )
+}
+```
+
+### 10. Component Usage
+
+**Heavily avoid raw HTML input components. Always use components from the `web/src/refresh-components` or `web/lib/opal/src` directory.**
+
+**Reason:** We've put in a lot of effort to unify the components that are rendered in the Onyx app. Using raw components breaks the entire UI of the application, and leaves it in a muddier state than before.
+
+```typescript
+// ✅ Good
+import Button from '@/refresh-components/buttons/Button'
+import InputTypeIn from '@/refresh-components/inputs/InputTypeIn'
+import SvgPlusCircle from '@/icons/plus-circle'
+
+function ContactForm() {
+  return (
+    <form>
+      <InputTypeIn placeholder="Search..." />
+      <Button type="submit" leftIcon={SvgPlusCircle}>Submit</Button>
+    </form>
+  )
+}
+
+// ❌ Bad
+function ContactForm() {
+  return (
+    <form>
+      <input placeholder="Name" />
+      <textarea placeholder="Message" />
+      <button type="submit">Submit</button>
+    </form>
+  )
+}
+```
+
+### 11. Colors
+
+**Always use custom overrides for colors and borders rather than built in Tailwind CSS colors. These overrides live in `web/tailwind-themes/tailwind.config.js`.**
+
+**Reason:** Our custom color system uses CSS variables that automatically handle dark mode and maintain design consistency across the app. Standard Tailwind colors bypass this system.
+
+**Available color categories:**
+- **Text:** `text-01` through `text-05`, `text-inverted-XX`
+- **Backgrounds:** `background-neutral-XX`, `background-tint-XX` (and inverted variants)
+- **Borders:** `border-01` through `border-05`, `border-inverted-XX`
+- **Actions:** `action-link-XX`, `action-danger-XX`
+- **Status:** `status-info-XX`, `status-success-XX`, `status-warning-XX`, `status-error-XX`
+- **Theme:** `theme-primary-XX`, `theme-red-XX`, `theme-blue-XX`, etc.
+
+```typescript
+// ✅ Good - Use custom Onyx color classes
+<div className="bg-background-neutral-01 border border-border-02" />
+<div className="bg-background-tint-02 border border-border-01" />
+<div className="bg-status-success-01" />
+<div className="bg-action-link-01" />
+<div className="bg-theme-primary-05" />
+
+// ❌ Bad - Do NOT use standard Tailwind colors
+<div className="bg-gray-100 border border-gray-300 text-gray-600" />
+<div className="bg-white border border-slate-200" />
+<div className="bg-green-100 text-green-700" />
+<div className="bg-blue-100 text-blue-600" />
+<div className="bg-indigo-500" />
+```
+
+### 12. Data Fetching
+
+**Prefer using `useSWR` for data fetching. Data should generally be fetched on the client side. Components that need data should display a loader / placeholder while waiting for that data. Prefer loading data within the component that needs it rather than at the top level and passing it down.**
+
+**Reason:** Client side fetching allows us to load the skeleton of the page without waiting for data to load, leading to a snappier UX. Loading data where needed reduces dependencies between a component and its parent component(s).
+
 ## Database & Migrations

 ### Running Migrations
@@ -300,14 +580,6 @@ will be tailing their logs to this file.
 - Token management and rate limiting
 - Custom prompts and agent actions

-## UI/UX Patterns
-
- Tailwind CSS with design system in `web/src/components/ui/`
- Radix UI and Headless UI for accessible components
- SWR for data fetching and caching
- Form validation with react-hook-form
- Error handling with popup notifications
-
 ## Creating a Plan
 When creating a plan in the `plans` directory, make sure to include at least these elements:

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,262 +1,31 @@
-<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/CONTRIBUTING.md"} -->
-
 # Contributing to Onyx
-
 Hey there! We are so excited that you're interested in Onyx.

-As an open source project in a rapidly changing space, we welcome all contributions.

-## 💃 Guidelines
+## Contribution Opportunities
+The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to look for and share contribution ideas.

-### Contribution Opportunities
+If you have your own feature that you would like to build please create an issue and community members can provide feedback and
+thumb it up if they feel a common need. 

-The [GitHub Issues](https://github.com/onyx-dot-app/onyx/issues) page is a great place to start for contribution ideas.

-To ensure that your contribution is aligned with the project's direction, please reach out to any maintainer on the Onyx team
-via [Discord](https://discord.gg/4NA5SbzrWb) or [email](mailto:hello@onyx.app).
+## Contributing Code
+Please reference the documents in contributing_guides folder to ensure that the code base is kept to a high standard.
+1. dev_setup.md (start here): gives you a guide to setting up a local development environment.
+2. contribution_process.md: how to ensure you are building valuable features that will get reviewed and merged.
+3. best_practices.md: before asking for reviews, ensure your changes meet the repo code quality standards.

-Issues that have been explicitly approved by the maintainers (aligned with the direction of the project)
-will be marked with the `approved by maintainers` label.
-Issues marked `good first issue` are an especially great place to start.
-
-**Connectors** to other tools are another great place to contribute. For details on how, refer to this
-[README.md](https://github.com/onyx-dot-app/onyx/blob/main/backend/onyx/connectors/README.md).
-
-If you have a new/different contribution in mind, we'd love to hear about it!
-Your input is vital to making sure that Onyx moves in the right direction.
-Before starting on implementation, please raise a GitHub issue.
-
-Also, always feel free to message the founders (Chris Weaver / Yuhong Sun) on
-[Discord](https://discord.gg/4NA5SbzrWb) directly about anything at all.
-
-### Contributing Code
-
-To contribute to this project, please follow the
+To contribute, please follow the
 ["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.
-When opening a pull request, mention related issues and feel free to tag relevant maintainers.

-Before creating a pull request please make sure that the new changes conform to the formatting and linting requirements.
-See the [Formatting and Linting](#formatting-and-linting) section for how to run these checks locally.

-### Getting Help 🙋
+## Getting Help 🙋
+We have support channels and generally interesting discussions on our [Discord](https://discord.gg/4NA5SbzrWb).

-Our goal is to make contributing as easy as possible. If you run into any issues please don't hesitate to reach out.
-That way we can help future contributors and users can avoid the same issue.
+See you there!

-We also have support channels and generally interesting discussions on our
-[Discord](https://discord.gg/4NA5SbzrWb).
-
-We would love to see you there!
-
-## Get Started 🚀
-
-Onyx being a fully functional app, relies on some external software, specifically:
-
- [Postgres](https://www.postgresql.org/) (Relational DB)
- [Vespa](https://vespa.ai/) (Vector DB/Search Engine)
- [Redis](https://redis.io/) (Cache)
- [MinIO](https://min.io/) (File Store)
- [Nginx](https://nginx.org/) (Not needed for development flows generally)
-
-> **Note:**
-> This guide provides instructions to build and run Onyx locally from source with Docker containers providing the above external software. We believe this combination is easier for
-> development purposes. If you prefer to use pre-built container images, we provide instructions on running the full Onyx stack within Docker below.
-
-### Local Set Up
-
-Be sure to use Python version 3.11. For instructions on installing Python 3.11 on macOS, refer to the [CONTRIBUTING_MACOS.md](./CONTRIBUTING_MACOS.md) readme.
-
-If using a lower version, modifications will have to be made to the code.
-If using a higher version, sometimes some libraries will not be available (i.e. we had problems with Tensorflow in the past with higher versions of python).
-
-#### Backend: Python requirements
-
-Currently, we use [uv](https://docs.astral.sh/uv/) and recommend creating a [virtual environment](https://docs.astral.sh/uv/pip/environments/#using-a-virtual-environment).
-
-For convenience here's a command for it:
-
-```bash
-uv venv .venv --python 3.11
-source .venv/bin/activate
-```
-
-_For Windows, activate the virtual environment using Command Prompt:_
-
-```bash
-.venv\Scripts\activate
-```
-
-If using PowerShell, the command slightly differs:
-
-```powershell
-.venv\Scripts\Activate.ps1
-```
-
-Install the required python dependencies:
-
-```bash
-uv sync --all-extras
-```
-
-Install Playwright for Python (headless browser required by the Web Connector):
-
-```bash
-uv run playwright install
-```
-
-#### Frontend: Node dependencies
-
-Onyx uses Node v22.20.0. We highly recommend you use [Node Version Manager (nvm)](https://github.com/nvm-sh/nvm)
-to manage your Node installations. Once installed, you can run
-
-```bash
-nvm install 22 && nvm use 22
-node -v # verify your active version
-```
-
-Navigate to `onyx/web` and run:
-
-```bash
-npm i
-```
-
-## Formatting and Linting
-
-### Backend
-
-For the backend, you'll need to setup pre-commit hooks (black / reorder-python-imports).
-
-Then run:
-
-```bash
-uv run pre-commit install
-```
-
-Additionally, we use `mypy` for static type checking.
-Onyx is fully type-annotated, and we want to keep it that way!
-To run the mypy checks manually, run `uv run mypy .` from the `onyx/backend` directory.
-
-### Web
-
-We use `prettier` for formatting. The desired version will be installed via a `npm i` from the `onyx/web` directory.
-To run the formatter, use `npx prettier --write .` from the `onyx/web` directory.
-
-Pre-commit will also run prettier automatically on files you've recently touched. If re-formatted, your commit will fail.
-Re-stage your changes and commit again.
-
-# Running the application for development
-
-## Developing using VSCode Debugger (recommended)
-
-**We highly recommend using VSCode debugger for development.**
-See [CONTRIBUTING_VSCODE.md](./CONTRIBUTING_VSCODE.md) for more details.
-
-Otherwise, you can follow the instructions below to run the application for development.
-
-## Manually running the application for development
-### Docker containers for external software
-
-You will need Docker installed to run these containers.
-
-First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:
-
-```bash
-docker compose up -d index relational_db cache minio
-```
-
-(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
-
-### Running Onyx locally
-
-To start the frontend, navigate to `onyx/web` and run:
-
-```bash
-npm run dev
-```
-
-Next, start the model server which runs the local NLP models.
-Navigate to `onyx/backend` and run:
-
-```bash
-uvicorn model_server.main:app --reload --port 9000
-```
-
-_For Windows (for compatibility with both PowerShell and Command Prompt):_
-
-```bash
-powershell -Command "uvicorn model_server.main:app --reload --port 9000"
-```
-
-The first time running Onyx, you will need to run the DB migrations for Postgres.
-After the first time, this is no longer required unless the DB models change.
-
-Navigate to `onyx/backend` and with the venv active, run:
-
-```bash
-alembic upgrade head
-```
-
-Next, start the task queue which orchestrates the background jobs.
-Jobs that take more time are run async from the API server.
-
-Still in `onyx/backend`, run:
-
-```bash
-python ./scripts/dev_run_background_jobs.py
-```
-
-To run the backend API server, navigate back to `onyx/backend` and run:
-
-```bash
-AUTH_TYPE=disabled uvicorn onyx.main:app --reload --port 8080
-```
-
-_For Windows (for compatibility with both PowerShell and Command Prompt):_
-
-```bash
-powershell -Command "
-    $env:AUTH_TYPE='disabled'
-    uvicorn onyx.main:app --reload --port 8080
-"
-```
-
-> **Note:**
-> If you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.
-
-#### Wrapping up
-
-You should now have 4 servers running:
-
- Web server
- Backend API
- Model server
- Background jobs
-
-Now, visit `http://localhost:3000` in your browser. You should see the Onyx onboarding wizard where you can connect your external LLM provider to Onyx.
-
-You've successfully set up a local Onyx instance! 🏁
-
-#### Running the Onyx application in a container
-
-You can run the full Onyx application stack from pre-built images including all external software dependencies.
-
-Navigate to `onyx/deployment/docker_compose` and run:
-
-```bash
-docker compose up -d
-```
-
-After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Onyx.
-
-If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes like so:
-
-```bash
-docker compose up -d --build
-```
-
-
-### Release Process

+## Release Process
 Onyx loosely follows the SemVer versioning standard.
 Major changes are released with a "minor" version bump. Currently we use patch release versions to indicate small feature changes.
 A set of Docker containers will be pushed automatically to DockerHub with every tag.
--- a/backend/.dockerignore
+++ b/backend/.dockerignore
@@ -15,3 +15,4 @@ build/
 dist/
 .coverage
 htmlcov/
+model_server/legacy/
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -13,23 +13,10 @@ RUN uv pip install --system --no-cache-dir --upgrade \
        -r /tmp/requirements.txt && \
    rm -rf ~/.cache/uv /tmp/*.txt

-# Stage for downloading tokenizers
-FROM base AS tokenizers
-RUN python -c "from transformers import AutoTokenizer; \
-AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
-AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1');"
-
-# Stage for downloading Onyx models
-FROM base AS onyx-models
-RUN python -c "from huggingface_hub import snapshot_download; \
-snapshot_download(repo_id='onyx-dot-app/hybrid-intent-token-classifier'); \
-snapshot_download(repo_id='onyx-dot-app/information-content-model');"
-
-# Stage for downloading embedding and reranking models
+# Stage for downloading embedding models
 FROM base AS embedding-models
 RUN python -c "from huggingface_hub import snapshot_download; \
-snapshot_download('nomic-ai/nomic-embed-text-v1'); \
-snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1');"
+snapshot_download('nomic-ai/nomic-embed-text-v1');"

 # Initialize SentenceTransformer to cache the custom architecture
 RUN python -c "from sentence_transformers import SentenceTransformer; \
@@ -54,8 +41,6 @@ RUN groupadd -g 1001 onyx && \
 # In case the user has volumes mounted to /app/.cache/huggingface that they've downloaded while
 # running Onyx, move the current contents of the cache folder to a temporary location to ensure
 # it's preserved in order to combine with the user's cache contents
-COPY --chown=onyx:onyx --from=tokenizers /app/.cache/huggingface /app/.cache/temp_huggingface
-COPY --chown=onyx:onyx --from=onyx-models /app/.cache/huggingface /app/.cache/temp_huggingface
 COPY --chown=onyx:onyx --from=embedding-models /app/.cache/huggingface /app/.cache/temp_huggingface

 WORKDIR /app
--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -39,7 +39,9 @@ config = context.config
 if config.config_file_name is not None and config.attributes.get(
    "configure_logger", True
 ):
-    fileConfig(config.config_file_name)
+    # disable_existing_loggers=False prevents breaking pytest's caplog fixture
+    # See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues
+    fileConfig(config.config_file_name, disable_existing_loggers=False)

 target_metadata = [Base.metadata, ResultModelBase.metadata]

@@ -223,7 +225,6 @@ def do_run_migrations(
 ) -> None:
    if create_schema:
        connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"'))
-        connection.execute(text("COMMIT"))

    connection.execute(text(f'SET search_path TO "{schema_name}"'))

@@ -307,6 +308,7 @@ async def run_async_migrations() -> None:
                        schema_name=schema,
                        create_schema=create_schema,
                    )
+                    await connection.commit()
            except Exception as e:
                logger.error(f"Error migrating schema {schema}: {e}")
                if not continue_on_error:
@@ -344,6 +346,7 @@ async def run_async_migrations() -> None:
                        schema_name=schema,
                        create_schema=create_schema,
                    )
+                    await connection.commit()
            except Exception as e:
                logger.error(f"Error migrating schema {schema}: {e}")
                if not continue_on_error:
@@ -460,8 +463,49 @@ def run_migrations_offline() -> None:


 def run_migrations_online() -> None:
-    logger.info("run_migrations_online starting.")
-    asyncio.run(run_async_migrations())
+    """Run migrations in 'online' mode.
+
+    Supports pytest-alembic by checking for a pre-configured connection
+    in context.config.attributes["connection"]. If present, uses that
+    connection/engine directly instead of creating a new async engine.
+    """
+    # Check if pytest-alembic is providing a connection/engine
+    connectable = context.config.attributes.get("connection", None)
+
+    if connectable is not None:
+        # pytest-alembic is providing an engine - use it directly
+        logger.info("run_migrations_online starting (pytest-alembic mode).")
+
+        # For pytest-alembic, we use the default schema (public)
+        schema_name = context.config.attributes.get(
+            "schema_name", POSTGRES_DEFAULT_SCHEMA
+        )
+
+        # pytest-alembic passes an Engine, we need to get a connection from it
+        with connectable.connect() as connection:
+            # Set search path for the schema
+            connection.execute(text(f'SET search_path TO "{schema_name}"'))
+
+            context.configure(
+                connection=connection,
+                target_metadata=target_metadata,  # type: ignore
+                include_object=include_object,
+                version_table_schema=schema_name,
+                include_schemas=True,
+                compare_type=True,
+                compare_server_default=True,
+                script_location=config.get_main_option("script_location"),
+            )
+
+            with context.begin_transaction():
+                context.run_migrations()
+
+            # Commit the transaction to ensure changes are visible to next migration
+            connection.commit()
+    else:
+        # Normal operation - use async migrations
+        logger.info("run_migrations_online starting.")
+        asyncio.run(run_async_migrations())


 if context.is_offline_mode():
--- a/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py
+++ b/backend/alembic/versions/23957775e5f5_remove_feedback_foreignkey_constraint.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "23957775e5f5"
 down_revision = "bc9771dccadf"
-branch_labels = None  # type: ignore
-depends_on = None  # type: ignore
+branch_labels = None
+depends_on = None


 def upgrade() -> None:
--- a/backend/alembic/versions/2a391f840e85_add_last_refreshed_at_mcp_server.py
+++ b/backend/alembic/versions/2a391f840e85_add_last_refreshed_at_mcp_server.py
@@ -0,0 +1,27 @@
+"""add last refreshed at mcp server
+
+Revision ID: 2a391f840e85
+Revises: 4cebcbc9b2ae
+Create Date: 2025-12-06 15:19:59.766066
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembi.
+revision = "2a391f840e85"
+down_revision = "4cebcbc9b2ae"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "mcp_server",
+        sa.Column("last_refreshed_at", sa.DateTime(timezone=True), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("mcp_server", "last_refreshed_at")
--- a/backend/alembic/versions/2b90f3af54b8_usage_limits.py
+++ b/backend/alembic/versions/2b90f3af54b8_usage_limits.py
@@ -0,0 +1,46 @@
+"""usage_limits
+
+Revision ID: 2b90f3af54b8
+Revises: 9a0296d7421e
+Create Date: 2026-01-03 16:55:30.449692
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "2b90f3af54b8"
+down_revision = "9a0296d7421e"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "tenant_usage",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column(
+            "window_start", sa.DateTime(timezone=True), nullable=False, index=True
+        ),
+        sa.Column("llm_cost_cents", sa.Float(), nullable=False, server_default="0.0"),
+        sa.Column("chunks_indexed", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column("api_calls", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column(
+            "non_streaming_api_calls", sa.Integer(), nullable=False, server_default="0"
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.func.now(),
+            nullable=True,
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("window_start", name="uq_tenant_usage_window"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_tenant_usage_window_start", table_name="tenant_usage")
+    op.drop_table("tenant_usage")
--- a/backend/alembic/versions/47a07e1a38f1_fix_invalid_model_configurations_state.py
+++ b/backend/alembic/versions/47a07e1a38f1_fix_invalid_model_configurations_state.py
@@ -11,7 +11,7 @@ from pydantic import BaseModel, ConfigDict
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql

-from onyx.llm.llm_provider_options import (
+from onyx.llm.well_known_providers.llm_provider_options import (
    fetch_model_names_for_provider_as_set,
    fetch_visible_model_names_for_provider_as_set,
 )
--- a/backend/alembic/versions/4cebcbc9b2ae_add_tab_index_to_tool_call.py
+++ b/backend/alembic/versions/4cebcbc9b2ae_add_tab_index_to_tool_call.py
@@ -0,0 +1,27 @@
+"""add tab_index to tool_call
+
+Revision ID: 4cebcbc9b2ae
+Revises: a1b2c3d4e5f6
+Create Date: 2025-12-16
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "4cebcbc9b2ae"
+down_revision = "a1b2c3d4e5f6"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "tool_call",
+        sa.Column("tab_index", sa.Integer(), nullable=False, server_default="0"),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("tool_call", "tab_index")
--- a/backend/alembic/versions/4ea2c93919c1_add_type_to_credentials.py
+++ b/backend/alembic/versions/4ea2c93919c1_add_type_to_credentials.py
@@ -62,6 +62,11 @@ def upgrade() -> None:
    )
    """
    )
+
+    # Drop the temporary table to avoid conflicts if migration runs again
+    # (e.g., during upgrade -> downgrade -> upgrade cycles in tests)
+    op.execute("DROP TABLE IF EXISTS temp_connector_credential")
+
    # If no exception was raised, alter the column
    op.alter_column("credential", "source", nullable=True)  # TODO modify
    # # ### end Alembic commands ###
--- a/backend/alembic/versions/505c488f6662_merge_default_assistants_into_unified.py
+++ b/backend/alembic/versions/505c488f6662_merge_default_assistants_into_unified.py
@@ -85,103 +85,122 @@ class UserRow(NamedTuple):
 def upgrade() -> None:
    conn = op.get_bind()

-    # Start transaction
-    conn.execute(sa.text("BEGIN"))
+    # Step 1: Create or update the unified assistant (ID 0)
+    search_assistant = conn.execute(
+        sa.text("SELECT * FROM persona WHERE id = 0")
+    ).fetchone()

-    try:
-        # Step 1: Create or update the unified assistant (ID 0)
-        search_assistant = conn.execute(
-            sa.text("SELECT * FROM persona WHERE id = 0")
-        ).fetchone()
-
-        if search_assistant:
-            # Update existing Search assistant to be the unified assistant
-            conn.execute(
-                sa.text(
-                    """
-                    UPDATE persona
-                    SET name = :name,
-                        description = :description,
-                        system_prompt = :system_prompt,
-                        num_chunks = :num_chunks,
-                        is_default_persona = true,
-                        is_visible = true,
-                        deleted = false,
-                        display_priority = :display_priority,
-                        llm_filter_extraction = :llm_filter_extraction,
-                        llm_relevance_filter = :llm_relevance_filter,
-                        recency_bias = :recency_bias,
-                        chunks_above = :chunks_above,
-                        chunks_below = :chunks_below,
-                        datetime_aware = :datetime_aware,
-                        starter_messages = null
-                    WHERE id = 0
-                """
-                ),
-                INSERT_DICT,
-            )
-        else:
-            # Create new unified assistant with ID 0
-            conn.execute(
-                sa.text(
-                    """
-                    INSERT INTO persona (
-                        id, name, description, system_prompt, num_chunks,
-                        is_default_persona, is_visible, deleted, display_priority,
-                        llm_filter_extraction, llm_relevance_filter, recency_bias,
-                        chunks_above, chunks_below, datetime_aware, starter_messages,
-                        builtin_persona
-                    ) VALUES (
-                        0, :name, :description, :system_prompt, :num_chunks,
-                        true, true, false, :display_priority, :llm_filter_extraction,
-                        :llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,
-                        :datetime_aware, null, true
-                    )
-                """
-                ),
-                INSERT_DICT,
-            )
-
-        # Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)
+    if search_assistant:
+        # Update existing Search assistant to be the unified assistant
        conn.execute(
            sa.text(
                """
                UPDATE persona
-                SET deleted = true, is_visible = false, is_default_persona = false
-                WHERE builtin_persona = true AND id != 0
+                SET name = :name,
+                    description = :description,
+                    system_prompt = :system_prompt,
+                    num_chunks = :num_chunks,
+                    is_default_persona = true,
+                    is_visible = true,
+                    deleted = false,
+                    display_priority = :display_priority,
+                    llm_filter_extraction = :llm_filter_extraction,
+                    llm_relevance_filter = :llm_relevance_filter,
+                    recency_bias = :recency_bias,
+                    chunks_above = :chunks_above,
+                    chunks_below = :chunks_below,
+                    datetime_aware = :datetime_aware,
+                    starter_messages = null
+                WHERE id = 0
            """
-            )
+            ),
+            INSERT_DICT,
+        )
+    else:
+        # Create new unified assistant with ID 0
+        conn.execute(
+            sa.text(
+                """
+                INSERT INTO persona (
+                    id, name, description, system_prompt, num_chunks,
+                    is_default_persona, is_visible, deleted, display_priority,
+                    llm_filter_extraction, llm_relevance_filter, recency_bias,
+                    chunks_above, chunks_below, datetime_aware, starter_messages,
+                    builtin_persona
+                ) VALUES (
+                    0, :name, :description, :system_prompt, :num_chunks,
+                    true, true, false, :display_priority, :llm_filter_extraction,
+                    :llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,
+                    :datetime_aware, null, true
+                )
+            """
+            ),
+            INSERT_DICT,
        )

-        # Step 3: Add all built-in tools to the unified assistant
-        # First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool
-        search_tool = conn.execute(
-            sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'")
-        ).fetchone()
+    # Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET deleted = true, is_visible = false, is_default_persona = false
+            WHERE builtin_persona = true AND id != 0
+        """
+        )
+    )

-        if not search_tool:
-            raise ValueError(
-                "SearchTool not found in database. Ensure tools migration has run first."
-            )
+    # Step 3: Add all built-in tools to the unified assistant
+    # First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool
+    search_tool = conn.execute(
+        sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'")
+    ).fetchone()

-        image_gen_tool = conn.execute(
-            sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'")
-        ).fetchone()
+    if not search_tool:
+        raise ValueError(
+            "SearchTool not found in database. Ensure tools migration has run first."
+        )

-        if not image_gen_tool:
-            raise ValueError(
-                "ImageGenerationTool not found in database. Ensure tools migration has run first."
-            )
+    image_gen_tool = conn.execute(
+        sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'")
+    ).fetchone()

-        # WebSearchTool is optional - may not be configured
-        web_search_tool = conn.execute(
-            sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'")
-        ).fetchone()
+    if not image_gen_tool:
+        raise ValueError(
+            "ImageGenerationTool not found in database. Ensure tools migration has run first."
+        )

-        # Clear existing tool associations for persona 0
-        conn.execute(sa.text("DELETE FROM persona__tool WHERE persona_id = 0"))
+    # WebSearchTool is optional - may not be configured
+    web_search_tool = conn.execute(
+        sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'")
+    ).fetchone()

-        # Add tools to the unified assistant
+    # Clear existing tool associations for persona 0
+    conn.execute(sa.text("DELETE FROM persona__tool WHERE persona_id = 0"))
+
+    # Add tools to the unified assistant
+    conn.execute(
+        sa.text(
+            """
+            INSERT INTO persona__tool (persona_id, tool_id)
+            VALUES (0, :tool_id)
+            ON CONFLICT DO NOTHING
+        """
+        ),
+        {"tool_id": search_tool[0]},
+    )
+
+    conn.execute(
+        sa.text(
+            """
+            INSERT INTO persona__tool (persona_id, tool_id)
+            VALUES (0, :tool_id)
+            ON CONFLICT DO NOTHING
+        """
+        ),
+        {"tool_id": image_gen_tool[0]},
+    )
+
+    if web_search_tool:
        conn.execute(
            sa.text(
                """
@@ -190,191 +209,148 @@ def upgrade() -> None:
                ON CONFLICT DO NOTHING
            """
            ),
-            {"tool_id": search_tool[0]},
+            {"tool_id": web_search_tool[0]},
        )

-        conn.execute(
-            sa.text(
-                """
-                INSERT INTO persona__tool (persona_id, tool_id)
-                VALUES (0, :tool_id)
-                ON CONFLICT DO NOTHING
+    # Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant
+    conn.execute(
+        sa.text(
            """
-            ),
-            {"tool_id": image_gen_tool[0]},
+            UPDATE chat_session
+            SET persona_id = 0
+            WHERE persona_id IN (
+                SELECT id FROM persona WHERE builtin_persona = true AND id != 0
+            )
+        """
        )
+    )

-        if web_search_tool:
+    # Step 5: Migrate user preferences - remove references to all builtin assistants
+    # First, get all builtin assistant IDs (except 0)
+    builtin_assistants_result = conn.execute(
+        sa.text(
+            """
+            SELECT id FROM persona
+            WHERE builtin_persona = true AND id != 0
+        """
+        )
+    ).fetchall()
+    builtin_assistant_ids = [row[0] for row in builtin_assistants_result]
+
+    # Get all users with preferences
+    users_result = conn.execute(
+        sa.text(
+            """
+            SELECT id, chosen_assistants, visible_assistants,
+                   hidden_assistants, pinned_assistants
+            FROM "user"
+        """
+        )
+    ).fetchall()
+
+    for user_row in users_result:
+        user = UserRow(*user_row)
+        user_id: UUID = user.id
+        updates: dict[str, Any] = {}
+
+        # Remove all builtin assistants from chosen_assistants
+        if user.chosen_assistants:
+            new_chosen: list[int] = [
+                assistant_id
+                for assistant_id in user.chosen_assistants
+                if assistant_id not in builtin_assistant_ids
+            ]
+            if new_chosen != user.chosen_assistants:
+                updates["chosen_assistants"] = json.dumps(new_chosen)
+
+        # Remove all builtin assistants from visible_assistants
+        if user.visible_assistants:
+            new_visible: list[int] = [
+                assistant_id
+                for assistant_id in user.visible_assistants
+                if assistant_id not in builtin_assistant_ids
+            ]
+            if new_visible != user.visible_assistants:
+                updates["visible_assistants"] = json.dumps(new_visible)
+
+        # Add all builtin assistants to hidden_assistants
+        if user.hidden_assistants:
+            new_hidden: list[int] = list(user.hidden_assistants)
+            for old_id in builtin_assistant_ids:
+                if old_id not in new_hidden:
+                    new_hidden.append(old_id)
+            if new_hidden != user.hidden_assistants:
+                updates["hidden_assistants"] = json.dumps(new_hidden)
+        else:
+            updates["hidden_assistants"] = json.dumps(builtin_assistant_ids)
+
+        # Remove all builtin assistants from pinned_assistants
+        if user.pinned_assistants:
+            new_pinned: list[int] = [
+                assistant_id
+                for assistant_id in user.pinned_assistants
+                if assistant_id not in builtin_assistant_ids
+            ]
+            if new_pinned != user.pinned_assistants:
+                updates["pinned_assistants"] = json.dumps(new_pinned)
+
+        # Apply updates if any
+        if updates:
+            set_clause = ", ".join([f"{k} = :{k}" for k in updates.keys()])
+            updates["user_id"] = str(user_id)  # Convert UUID to string for SQL
            conn.execute(
-                sa.text(
-                    """
-                    INSERT INTO persona__tool (persona_id, tool_id)
-                    VALUES (0, :tool_id)
-                    ON CONFLICT DO NOTHING
-                """
-                ),
-                {"tool_id": web_search_tool[0]},
+                sa.text(f'UPDATE "user" SET {set_clause} WHERE id = :user_id'),
+                updates,
            )

-        # Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant
-        conn.execute(
-            sa.text(
-                """
-                UPDATE chat_session
-                SET persona_id = 0
-                WHERE persona_id IN (
-                    SELECT id FROM persona WHERE builtin_persona = true AND id != 0
-                )
-            """
-            )
-        )
-
-        # Step 5: Migrate user preferences - remove references to all builtin assistants
-        # First, get all builtin assistant IDs (except 0)
-        builtin_assistants_result = conn.execute(
-            sa.text(
-                """
-                SELECT id FROM persona
-                WHERE builtin_persona = true AND id != 0
-            """
-            )
-        ).fetchall()
-        builtin_assistant_ids = [row[0] for row in builtin_assistants_result]
-
-        # Get all users with preferences
-        users_result = conn.execute(
-            sa.text(
-                """
-                SELECT id, chosen_assistants, visible_assistants,
-                       hidden_assistants, pinned_assistants
-                FROM "user"
-            """
-            )
-        ).fetchall()
-
-        for user_row in users_result:
-            user = UserRow(*user_row)
-            user_id: UUID = user.id
-            updates: dict[str, Any] = {}
-
-            # Remove all builtin assistants from chosen_assistants
-            if user.chosen_assistants:
-                new_chosen: list[int] = [
-                    assistant_id
-                    for assistant_id in user.chosen_assistants
-                    if assistant_id not in builtin_assistant_ids
-                ]
-                if new_chosen != user.chosen_assistants:
-                    updates["chosen_assistants"] = json.dumps(new_chosen)
-
-            # Remove all builtin assistants from visible_assistants
-            if user.visible_assistants:
-                new_visible: list[int] = [
-                    assistant_id
-                    for assistant_id in user.visible_assistants
-                    if assistant_id not in builtin_assistant_ids
-                ]
-                if new_visible != user.visible_assistants:
-                    updates["visible_assistants"] = json.dumps(new_visible)
-
-            # Add all builtin assistants to hidden_assistants
-            if user.hidden_assistants:
-                new_hidden: list[int] = list(user.hidden_assistants)
-                for old_id in builtin_assistant_ids:
-                    if old_id not in new_hidden:
-                        new_hidden.append(old_id)
-                if new_hidden != user.hidden_assistants:
-                    updates["hidden_assistants"] = json.dumps(new_hidden)
-            else:
-                updates["hidden_assistants"] = json.dumps(builtin_assistant_ids)
-
-            # Remove all builtin assistants from pinned_assistants
-            if user.pinned_assistants:
-                new_pinned: list[int] = [
-                    assistant_id
-                    for assistant_id in user.pinned_assistants
-                    if assistant_id not in builtin_assistant_ids
-                ]
-                if new_pinned != user.pinned_assistants:
-                    updates["pinned_assistants"] = json.dumps(new_pinned)
-
-            # Apply updates if any
-            if updates:
-                set_clause = ", ".join([f"{k} = :{k}" for k in updates.keys()])
-                updates["user_id"] = str(user_id)  # Convert UUID to string for SQL
-                conn.execute(
-                    sa.text(f'UPDATE "user" SET {set_clause} WHERE id = :user_id'),
-                    updates,
-                )
-
-        # Commit transaction
-        conn.execute(sa.text("COMMIT"))
-
-    except Exception as e:
-        # Rollback on error
-        conn.execute(sa.text("ROLLBACK"))
-        raise e
-

 def downgrade() -> None:
    conn = op.get_bind()

-    # Start transaction
-    conn.execute(sa.text("BEGIN"))
-
-    try:
-        # Only restore General (ID -1) and Art (ID -3) assistants
-        # Step 1: Keep Search assistant (ID 0) as default but restore original state
-        conn.execute(
-            sa.text(
-                """
-                UPDATE persona
-                SET is_default_persona = true,
-                    is_visible = true,
-                    deleted = false
-                WHERE id = 0
+    # Only restore General (ID -1) and Art (ID -3) assistants
+    # Step 1: Keep Search assistant (ID 0) as default but restore original state
+    conn.execute(
+        sa.text(
            """
-            )
+            UPDATE persona
+            SET is_default_persona = true,
+                is_visible = true,
+                deleted = false
+            WHERE id = 0
+        """
        )
+    )

-        # Step 2: Restore General assistant (ID -1)
-        conn.execute(
-            sa.text(
-                """
-                UPDATE persona
-                SET deleted = false,
-                    is_visible = true,
-                    is_default_persona = true
-                WHERE id = :general_assistant_id
+    # Step 2: Restore General assistant (ID -1)
+    conn.execute(
+        sa.text(
            """
-            ),
-            {"general_assistant_id": GENERAL_ASSISTANT_ID},
-        )
+            UPDATE persona
+            SET deleted = false,
+                is_visible = true,
+                is_default_persona = true
+            WHERE id = :general_assistant_id
+        """
+        ),
+        {"general_assistant_id": GENERAL_ASSISTANT_ID},
+    )

-        # Step 3: Restore Art assistant (ID -3)
-        conn.execute(
-            sa.text(
-                """
-                UPDATE persona
-                SET deleted = false,
-                    is_visible = true,
-                    is_default_persona = true
-                WHERE id = :art_assistant_id
+    # Step 3: Restore Art assistant (ID -3)
+    conn.execute(
+        sa.text(
            """
-            ),
-            {"art_assistant_id": ART_ASSISTANT_ID},
-        )
+            UPDATE persona
+            SET deleted = false,
+                is_visible = true,
+                is_default_persona = true
+            WHERE id = :art_assistant_id
+        """
+        ),
+        {"art_assistant_id": ART_ASSISTANT_ID},
+    )

-        # Note: We don't restore the original tool associations, names, or descriptions
-        # as those would require more complex logic to determine original state.
-        # We also cannot restore original chat session persona_ids as we don't
-        # have the original mappings.
-        # Other builtin assistants remain deleted as per the requirement.
-
-        # Commit transaction
-        conn.execute(sa.text("COMMIT"))
-
-    except Exception as e:
-        # Rollback on error
-        conn.execute(sa.text("ROLLBACK"))
-        raise e
+    # Note: We don't restore the original tool associations, names, or descriptions
+    # as those would require more complex logic to determine original state.
+    # We also cannot restore original chat session persona_ids as we don't
+    # have the original mappings.
+    # Other builtin assistants remain deleted as per the requirement.
--- a/backend/alembic/versions/5c3dca366b35_backend_driven_notification_details.py
+++ b/backend/alembic/versions/5c3dca366b35_backend_driven_notification_details.py
@@ -0,0 +1,35 @@
+"""backend driven notification details
+
+Revision ID: 5c3dca366b35
+Revises: 9087b548dd69
+Create Date: 2026-01-06 16:03:11.413724
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "5c3dca366b35"
+down_revision = "9087b548dd69"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "notification",
+        sa.Column(
+            "title", sa.String(), nullable=False, server_default="New Notification"
+        ),
+    )
+    op.add_column(
+        "notification",
+        sa.Column("description", sa.String(), nullable=True, server_default=""),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("notification", "title")
+    op.drop_column("notification", "description")
--- a/backend/alembic/versions/699221885109_nullify_default_task_prompt.py
+++ b/backend/alembic/versions/699221885109_nullify_default_task_prompt.py
@@ -0,0 +1,75 @@
+"""nullify_default_task_prompt
+
+Revision ID: 699221885109
+Revises: 7e490836d179
+Create Date: 2025-12-30 10:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "699221885109"
+down_revision = "7e490836d179"
+branch_labels = None
+depends_on = None
+
+DEFAULT_PERSONA_ID = 0
+
+
+def upgrade() -> None:
+    # Make task_prompt column nullable
+    # Note: The model had nullable=True but the DB column was NOT NULL until this point
+    op.alter_column(
+        "persona",
+        "task_prompt",
+        nullable=True,
+    )
+
+    # Set task_prompt to NULL for the default persona
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET task_prompt = NULL
+            WHERE id = :persona_id
+            """
+        ),
+        {"persona_id": DEFAULT_PERSONA_ID},
+    )
+
+
+def downgrade() -> None:
+    # Restore task_prompt to empty string for the default persona
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET task_prompt = ''
+            WHERE id = :persona_id AND task_prompt IS NULL
+            """
+        ),
+        {"persona_id": DEFAULT_PERSONA_ID},
+    )
+
+    # Set any remaining NULL task_prompts to empty string before making non-nullable
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET task_prompt = ''
+            WHERE task_prompt IS NULL
+            """
+        )
+    )
+
+    # Revert task_prompt column to not nullable
+    op.alter_column(
+        "persona",
+        "task_prompt",
+        nullable=False,
+    )
--- a/backend/alembic/versions/7206234e012a_add_image_generation_config_table.py
+++ b/backend/alembic/versions/7206234e012a_add_image_generation_config_table.py
@@ -0,0 +1,54 @@
+"""add image generation config table
+
+Revision ID: 7206234e012a
+Revises: 699221885109
+Create Date: 2025-12-21 00:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "7206234e012a"
+down_revision = "699221885109"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "image_generation_config",
+        sa.Column("image_provider_id", sa.String(), primary_key=True),
+        sa.Column("model_configuration_id", sa.Integer(), nullable=False),
+        sa.Column("is_default", sa.Boolean(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["model_configuration_id"],
+            ["model_configuration.id"],
+            ondelete="CASCADE",
+        ),
+    )
+    op.create_index(
+        "ix_image_generation_config_is_default",
+        "image_generation_config",
+        ["is_default"],
+        unique=False,
+    )
+    op.create_index(
+        "ix_image_generation_config_model_configuration_id",
+        "image_generation_config",
+        ["model_configuration_id"],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "ix_image_generation_config_model_configuration_id",
+        table_name="image_generation_config",
+    )
+    op.drop_index(
+        "ix_image_generation_config_is_default", table_name="image_generation_config"
+    )
+    op.drop_table("image_generation_config")
--- a/backend/alembic/versions/73e9983e5091_add_search_query_table.py
+++ b/backend/alembic/versions/73e9983e5091_add_search_query_table.py
@@ -0,0 +1,47 @@
+"""add_search_query_table
+
+Revision ID: 73e9983e5091
+Revises: d1b637d7050a
+Create Date: 2026-01-14 14:16:52.837489
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "73e9983e5091"
+down_revision = "d1b637d7050a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "search_query",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column(
+            "user_id",
+            postgresql.UUID(as_uuid=True),
+            sa.ForeignKey("user.id"),
+            nullable=False,
+        ),
+        sa.Column("query", sa.String(), nullable=False),
+        sa.Column("query_expansions", postgresql.ARRAY(sa.String()), nullable=True),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+    )
+
+    op.create_index("ix_search_query_user_id", "search_query", ["user_id"])
+    op.create_index("ix_search_query_created_at", "search_query", ["created_at"])
+
+
+def downgrade() -> None:
+    op.drop_index("ix_search_query_created_at", table_name="search_query")
+    op.drop_index("ix_search_query_user_id", table_name="search_query")
+    op.drop_table("search_query")
--- a/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py
+++ b/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py
@@ -10,8 +10,7 @@ from alembic import op
 import sqlalchemy as sa

 from onyx.db.models import IndexModelStatus
-from onyx.context.search.enums import RecencyBiasSetting
-from onyx.context.search.enums import SearchType
+from onyx.context.search.enums import RecencyBiasSetting, SearchType

 # revision identifiers, used by Alembic.
 revision = "776b3bbe9092"
--- a/backend/alembic/versions/7a70b7664e37_add_model_configuration_table.py
+++ b/backend/alembic/versions/7a70b7664e37_add_model_configuration_table.py
@@ -10,7 +10,7 @@ from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql

-from onyx.llm.llm_provider_options import (
+from onyx.llm.well_known_providers.llm_provider_options import (
    fetch_model_names_for_provider_as_set,
    fetch_visible_model_names_for_provider_as_set,
 )
--- a/backend/alembic/versions/7e490836d179_nullify_default_system_prompt.py
+++ b/backend/alembic/versions/7e490836d179_nullify_default_system_prompt.py
@@ -0,0 +1,80 @@
+"""nullify_default_system_prompt
+
+Revision ID: 7e490836d179
+Revises: c1d2e3f4a5b6
+Create Date: 2025-12-29 16:54:36.635574
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "7e490836d179"
+down_revision = "c1d2e3f4a5b6"
+branch_labels = None
+depends_on = None
+
+
+# This is the default system prompt from the previous migration (87c52ec39f84)
+# ruff: noqa: E501, W605 start
+PREVIOUS_DEFAULT_SYSTEM_PROMPT = """
+You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient.
+
+The current date is [[CURRENT_DATETIME]].[[CITATION_GUIDANCE]]
+
+# Response Style
+You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make your responses more readable and engaging.
+You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
+For code you prefer to use Markdown and specify the language.
+You can use horizontal rules (---) to separate sections of your responses.
+You can use Markdown tables to format your responses for data, lists, and other structured information.
+""".lstrip()
+# ruff: noqa: E501, W605 end
+
+
+def upgrade() -> None:
+    # Make system_prompt column nullable (model already has nullable=True but DB doesn't)
+    op.alter_column(
+        "persona",
+        "system_prompt",
+        nullable=True,
+    )
+
+    # Set system_prompt to NULL where it matches the previous default
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET system_prompt = NULL
+            WHERE system_prompt = :previous_default
+            """
+        ),
+        {"previous_default": PREVIOUS_DEFAULT_SYSTEM_PROMPT},
+    )
+
+
+def downgrade() -> None:
+    # Restore the default system prompt for personas that have NULL
+    # Note: This may restore the prompt to personas that originally had NULL
+    # before this migration, but there's no way to distinguish them
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            UPDATE persona
+            SET system_prompt = :previous_default
+            WHERE system_prompt IS NULL
+            """
+        ),
+        {"previous_default": PREVIOUS_DEFAULT_SYSTEM_PROMPT},
+    )
+
+    # Revert system_prompt column to not nullable
+    op.alter_column(
+        "persona",
+        "system_prompt",
+        nullable=False,
+    )
--- a/backend/alembic/versions/7ed603b64d5a_add_mcp_server_and_connection_config_.py
+++ b/backend/alembic/versions/7ed603b64d5a_add_mcp_server_and_connection_config_.py
@@ -42,13 +42,13 @@ def upgrade() -> None:
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),  # type: ignore
+            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),  # type: ignore
+            server_default=sa.text("now()"),
            nullable=False,
        ),
    )
@@ -63,13 +63,13 @@ def upgrade() -> None:
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),  # type: ignore
+            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated_at",
            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),  # type: ignore
+            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
--- a/backend/alembic/versions/8405ca81cc83_notifications_constraint.py
+++ b/backend/alembic/versions/8405ca81cc83_notifications_constraint.py
@@ -0,0 +1,49 @@
+"""notifications constraint, sort index, and cleanup old notifications
+
+Revision ID: 8405ca81cc83
+Revises: a3c1a7904cd0
+Create Date: 2026-01-07 16:43:44.855156
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "8405ca81cc83"
+down_revision = "a3c1a7904cd0"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Create unique index for notification deduplication.
+    # This enables atomic ON CONFLICT DO NOTHING inserts in batch_create_notifications.
+    #
+    # Uses COALESCE to handle NULL additional_data (NULLs are normally distinct
+    # in unique constraints, but we want NULL == NULL for deduplication).
+    # The '{}' represents an empty JSONB object as the NULL replacement.
+
+    # Clean up legacy notifications first
+    op.execute("DELETE FROM notification WHERE title = 'New Notification'")
+
+    op.execute(
+        """
+        CREATE UNIQUE INDEX IF NOT EXISTS ix_notification_user_type_data
+        ON notification (user_id, notif_type, COALESCE(additional_data, '{}'::jsonb))
+        """
+    )
+
+    # Create index for efficient notification sorting by user
+    # Covers: WHERE user_id = ? ORDER BY dismissed, first_shown DESC
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS ix_notification_user_sort
+        ON notification (user_id, dismissed, first_shown DESC)
+        """
+    )
+
+
+def downgrade() -> None:
+    op.execute("DROP INDEX IF EXISTS ix_notification_user_type_data")
+    op.execute("DROP INDEX IF EXISTS ix_notification_user_sort")
--- a/backend/alembic/versions/8b5ce697290e_add_discord_bot_tables.py
+++ b/backend/alembic/versions/8b5ce697290e_add_discord_bot_tables.py
@@ -0,0 +1,116 @@
+"""Add Discord bot tables
+
+Revision ID: 8b5ce697290e
+Revises: a1b2c3d4e5f7
+Create Date: 2025-01-14
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "8b5ce697290e"
+down_revision = "a1b2c3d4e5f7"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    # DiscordBotConfig (singleton table - one per tenant)
+    op.create_table(
+        "discord_bot_config",
+        sa.Column(
+            "id",
+            sa.String(),
+            primary_key=True,
+            server_default=sa.text("'SINGLETON'"),
+        ),
+        sa.Column("bot_token", sa.LargeBinary(), nullable=False),  # EncryptedString
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.func.now(),
+            nullable=False,
+        ),
+        sa.CheckConstraint("id = 'SINGLETON'", name="ck_discord_bot_config_singleton"),
+    )
+
+    # DiscordGuildConfig
+    op.create_table(
+        "discord_guild_config",
+        sa.Column("id", sa.Integer(), primary_key=True),
+        sa.Column("guild_id", sa.BigInteger(), nullable=True, unique=True),
+        sa.Column("guild_name", sa.String(), nullable=True),
+        sa.Column("registration_key", sa.String(), nullable=False, unique=True),
+        sa.Column("registered_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column(
+            "default_persona_id",
+            sa.Integer(),
+            sa.ForeignKey("persona.id", ondelete="SET NULL"),
+            nullable=True,
+        ),
+        sa.Column(
+            "enabled", sa.Boolean(), server_default=sa.text("true"), nullable=False
+        ),
+    )
+
+    # DiscordChannelConfig
+    op.create_table(
+        "discord_channel_config",
+        sa.Column("id", sa.Integer(), primary_key=True),
+        sa.Column(
+            "guild_config_id",
+            sa.Integer(),
+            sa.ForeignKey("discord_guild_config.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column("channel_id", sa.BigInteger(), nullable=False),
+        sa.Column("channel_name", sa.String(), nullable=False),
+        sa.Column(
+            "channel_type",
+            sa.String(20),
+            server_default=sa.text("'text'"),
+            nullable=False,
+        ),
+        sa.Column(
+            "is_private",
+            sa.Boolean(),
+            server_default=sa.text("false"),
+            nullable=False,
+        ),
+        sa.Column(
+            "thread_only_mode",
+            sa.Boolean(),
+            server_default=sa.text("false"),
+            nullable=False,
+        ),
+        sa.Column(
+            "require_bot_invocation",
+            sa.Boolean(),
+            server_default=sa.text("true"),
+            nullable=False,
+        ),
+        sa.Column(
+            "persona_override_id",
+            sa.Integer(),
+            sa.ForeignKey("persona.id", ondelete="SET NULL"),
+            nullable=True,
+        ),
+        sa.Column(
+            "enabled", sa.Boolean(), server_default=sa.text("false"), nullable=False
+        ),
+    )
+
+    # Unique constraint: one config per channel per guild
+    op.create_unique_constraint(
+        "uq_discord_channel_guild_channel",
+        "discord_channel_config",
+        ["guild_config_id", "channel_id"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("discord_channel_config")
+    op.drop_table("discord_guild_config")
+    op.drop_table("discord_bot_config")
--- a/backend/alembic/versions/9087b548dd69_seed_default_image_gen_config.py
+++ b/backend/alembic/versions/9087b548dd69_seed_default_image_gen_config.py
@@ -0,0 +1,136 @@
+"""seed_default_image_gen_config
+
+Revision ID: 9087b548dd69
+Revises: 2b90f3af54b8
+Create Date: 2026-01-05 00:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "9087b548dd69"
+down_revision = "2b90f3af54b8"
+branch_labels = None
+depends_on = None
+
+# Constants for default image generation config
+# Source: web/src/app/admin/configuration/image-generation/constants.ts
+IMAGE_PROVIDER_ID = "openai_gpt_image_1"
+MODEL_NAME = "gpt-image-1"
+PROVIDER_NAME = "openai"
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+
+    # Check if image_generation_config table already has records
+    existing_configs = (
+        conn.execute(sa.text("SELECT COUNT(*) FROM image_generation_config")).scalar()
+        or 0
+    )
+
+    if existing_configs > 0:
+        # Skip if configs already exist - user may have configured manually
+        return
+
+    # Find the first OpenAI LLM provider
+    openai_provider = conn.execute(
+        sa.text(
+            """
+            SELECT id, api_key
+            FROM llm_provider
+            WHERE provider = :provider
+            ORDER BY id
+            LIMIT 1
+            """
+        ),
+        {"provider": PROVIDER_NAME},
+    ).fetchone()
+
+    if not openai_provider:
+        # No OpenAI provider found - nothing to do
+        return
+
+    source_provider_id, api_key = openai_provider
+
+    # Create new LLM provider for image generation (clone only api_key)
+    result = conn.execute(
+        sa.text(
+            """
+            INSERT INTO llm_provider (
+                name, provider, api_key, api_base, api_version,
+                deployment_name, default_model_name, is_public,
+                is_default_provider, is_default_vision_provider, is_auto_mode
+            )
+            VALUES (
+                :name, :provider, :api_key, NULL, NULL,
+                NULL, :default_model_name, :is_public,
+                NULL, NULL, :is_auto_mode
+            )
+            RETURNING id
+            """
+        ),
+        {
+            "name": f"Image Gen - {IMAGE_PROVIDER_ID}",
+            "provider": PROVIDER_NAME,
+            "api_key": api_key,
+            "default_model_name": MODEL_NAME,
+            "is_public": True,
+            "is_auto_mode": False,
+        },
+    )
+    new_provider_id = result.scalar()
+
+    # Create model configuration
+    result = conn.execute(
+        sa.text(
+            """
+            INSERT INTO model_configuration (
+                llm_provider_id, name, is_visible, max_input_tokens,
+                supports_image_input, display_name
+            )
+            VALUES (
+                :llm_provider_id, :name, :is_visible, :max_input_tokens,
+                :supports_image_input, :display_name
+            )
+            RETURNING id
+            """
+        ),
+        {
+            "llm_provider_id": new_provider_id,
+            "name": MODEL_NAME,
+            "is_visible": True,
+            "max_input_tokens": None,
+            "supports_image_input": False,
+            "display_name": None,
+        },
+    )
+    model_config_id = result.scalar()
+
+    # Create image generation config
+    conn.execute(
+        sa.text(
+            """
+            INSERT INTO image_generation_config (
+                image_provider_id, model_configuration_id, is_default
+            )
+            VALUES (
+                :image_provider_id, :model_configuration_id, :is_default
+            )
+            """
+        ),
+        {
+            "image_provider_id": IMAGE_PROVIDER_ID,
+            "model_configuration_id": model_config_id,
+            "is_default": True,
+        },
+    )
+
+
+def downgrade() -> None:
+    # We don't remove the config on downgrade since it's safe to keep around
+    # If we upgrade again, it will be a no-op due to the existing records check
+    pass
--- a/backend/alembic/versions/9a0296d7421e_add_is_auto_mode_to_llm_provider.py
+++ b/backend/alembic/versions/9a0296d7421e_add_is_auto_mode_to_llm_provider.py
@@ -0,0 +1,33 @@
+"""add_is_auto_mode_to_llm_provider
+
+Revision ID: 9a0296d7421e
+Revises: 7206234e012a
+Create Date: 2025-12-17 18:14:29.620981
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "9a0296d7421e"
+down_revision = "7206234e012a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "llm_provider",
+        sa.Column(
+            "is_auto_mode",
+            sa.Boolean(),
+            nullable=False,
+            server_default="false",
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("llm_provider", "is_auto_mode")
--- a/backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
+++ b/backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
@@ -234,6 +234,8 @@ def downgrade() -> None:
        if "instructions" in columns:
            op.drop_column("user_project", "instructions")
        op.execute("ALTER TABLE user_project RENAME TO user_folder")
+        # Update NULL descriptions to empty string before setting NOT NULL constraint
+        op.execute("UPDATE user_folder SET description = '' WHERE description IS NULL")
        op.alter_column("user_folder", "description", nullable=False)
        logger.info("Renamed user_project back to user_folder")

--- a/backend/alembic/versions/a01bf2971c5d_update_default_tool_descriptions.py
+++ b/backend/alembic/versions/a01bf2971c5d_update_default_tool_descriptions.py
@@ -42,20 +42,13 @@ TOOL_DESCRIPTIONS = {

 def upgrade() -> None:
    conn = op.get_bind()
-    conn.execute(sa.text("BEGIN"))
-
-    try:
-        for tool_id, description in TOOL_DESCRIPTIONS.items():
-            conn.execute(
-                sa.text(
-                    "UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
-                ),
-                {"description": description, "tool_id": tool_id},
-            )
-        conn.execute(sa.text("COMMIT"))
-    except Exception as e:
-        conn.execute(sa.text("ROLLBACK"))
-        raise e
+    for tool_id, description in TOOL_DESCRIPTIONS.items():
+        conn.execute(
+            sa.text(
+                "UPDATE tool SET description = :description WHERE in_code_tool_id = :tool_id"
+            ),
+            {"description": description, "tool_id": tool_id},
+        )


 def downgrade() -> None:
--- a/backend/alembic/versions/a1b2c3d4e5f6_add_license_table.py
+++ b/backend/alembic/versions/a1b2c3d4e5f6_add_license_table.py
@@ -0,0 +1,49 @@
+"""add license table
+
+Revision ID: a1b2c3d4e5f6
+Revises: a01bf2971c5d
+Create Date: 2025-12-04 10:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "a1b2c3d4e5f6"
+down_revision = "a01bf2971c5d"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "license",
+        sa.Column("id", sa.Integer(), primary_key=True),
+        sa.Column("license_data", sa.Text(), nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.func.now(),
+            nullable=False,
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.func.now(),
+            nullable=False,
+        ),
+    )
+
+    # Singleton pattern - only ever one row in this table
+    op.create_index(
+        "idx_license_singleton",
+        "license",
+        [sa.text("(true)")],
+        unique=True,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("idx_license_singleton", table_name="license")
+    op.drop_table("license")
--- a/backend/alembic/versions/a1b2c3d4e5f7_drop_agent_search_metrics_table.py
+++ b/backend/alembic/versions/a1b2c3d4e5f7_drop_agent_search_metrics_table.py
@@ -0,0 +1,47 @@
+"""drop agent_search_metrics table
+
+Revision ID: a1b2c3d4e5f7
+Revises: 73e9983e5091
+Create Date: 2026-01-17
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "a1b2c3d4e5f7"
+down_revision = "73e9983e5091"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.drop_table("agent__search_metrics")
+
+
+def downgrade() -> None:
+    op.create_table(
+        "agent__search_metrics",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("user_id", sa.UUID(), nullable=True),
+        sa.Column("persona_id", sa.Integer(), nullable=True),
+        sa.Column("agent_type", sa.String(), nullable=False),
+        sa.Column("start_time", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("base_duration_s", sa.Float(), nullable=False),
+        sa.Column("full_duration_s", sa.Float(), nullable=False),
+        sa.Column("base_metrics", postgresql.JSONB(), nullable=True),
+        sa.Column("refined_metrics", postgresql.JSONB(), nullable=True),
+        sa.Column("all_metrics", postgresql.JSONB(), nullable=True),
+        sa.ForeignKeyConstraint(
+            ["user_id"],
+            ["user.id"],
+            ondelete="CASCADE",
+        ),
+        sa.ForeignKeyConstraint(
+            ["persona_id"],
+            ["persona.id"],
+        ),
+        sa.PrimaryKeyConstraint("id"),
+    )
--- a/backend/alembic/versions/a2b3c4d5e6f7_remove_fast_default_model_name.py
+++ b/backend/alembic/versions/a2b3c4d5e6f7_remove_fast_default_model_name.py
@@ -0,0 +1,27 @@
+"""Remove fast_default_model_name from llm_provider
+
+Revision ID: a2b3c4d5e6f7
+Revises: 2a391f840e85
+Create Date: 2024-12-17
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "a2b3c4d5e6f7"
+down_revision = "2a391f840e85"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.drop_column("llm_provider", "fast_default_model_name")
+
+
+def downgrade() -> None:
+    op.add_column(
+        "llm_provider",
+        sa.Column("fast_default_model_name", sa.String(), nullable=True),
+    )
--- a/backend/alembic/versions/a3c1a7904cd0_remove_userfile_related_deprecated_.py
+++ b/backend/alembic/versions/a3c1a7904cd0_remove_userfile_related_deprecated_.py
@@ -0,0 +1,39 @@
+"""remove userfile related deprecated fields
+
+Revision ID: a3c1a7904cd0
+Revises: 5c3dca366b35
+Create Date: 2026-01-06 13:00:30.634396
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "a3c1a7904cd0"
+down_revision = "5c3dca366b35"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.drop_column("user_file", "document_id")
+    op.drop_column("user_file", "document_id_migrated")
+    op.drop_column("connector_credential_pair", "is_user_file")
+
+
+def downgrade() -> None:
+    op.add_column(
+        "connector_credential_pair",
+        sa.Column("is_user_file", sa.Boolean(), nullable=False, server_default="false"),
+    )
+    op.add_column(
+        "user_file",
+        sa.Column("document_id", sa.String(), nullable=True),
+    )
+    op.add_column(
+        "user_file",
+        sa.Column(
+            "document_id_migrated", sa.Boolean(), nullable=False, server_default="true"
+        ),
+    )
--- a/backend/alembic/versions/a852cbe15577_new_chat_history.py
+++ b/backend/alembic/versions/a852cbe15577_new_chat_history.py
@@ -280,6 +280,14 @@ def downgrade() -> None:
    op.add_column(
        "chat_message", sa.Column("alternate_assistant_id", sa.Integer(), nullable=True)
    )
+    # Recreate the FK constraint that was implicitly dropped when the column was dropped
+    op.create_foreign_key(
+        "fk_chat_message_persona",
+        "chat_message",
+        "persona",
+        ["alternate_assistant_id"],
+        ["id"],
+    )
    op.add_column(
        "chat_message", sa.Column("rephrased_query", sa.Text(), nullable=True)
    )
--- a/backend/alembic/versions/b8c9d0e1f2a3_drop_milestone_table.py
+++ b/backend/alembic/versions/b8c9d0e1f2a3_drop_milestone_table.py
@@ -0,0 +1,46 @@
+"""Drop milestone table
+
+Revision ID: b8c9d0e1f2a3
+Revises: a2b3c4d5e6f7
+Create Date: 2025-12-18
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+import fastapi_users_db_sqlalchemy
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "b8c9d0e1f2a3"
+down_revision = "a2b3c4d5e6f7"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.drop_table("milestone")
+
+
+def downgrade() -> None:
+    op.create_table(
+        "milestone",
+        sa.Column("id", sa.UUID(), nullable=False),
+        sa.Column("tenant_id", sa.String(), nullable=True),
+        sa.Column(
+            "user_id",
+            fastapi_users_db_sqlalchemy.generics.GUID(),
+            nullable=True,
+        ),
+        sa.Column("event_type", sa.String(), nullable=False),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column("event_tracker", postgresql.JSONB(), nullable=True),
+        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint("event_type", name="uq_milestone_event_type"),
+    )
--- a/backend/alembic/versions/c1d2e3f4a5b6_add_deep_research_tool.py
+++ b/backend/alembic/versions/c1d2e3f4a5b6_add_deep_research_tool.py
@@ -0,0 +1,51 @@
+"""add_deep_research_tool
+
+Revision ID: c1d2e3f4a5b6
+Revises: b8c9d0e1f2a3
+Create Date: 2025-12-18 16:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "c1d2e3f4a5b6"
+down_revision = "b8c9d0e1f2a3"
+branch_labels = None
+depends_on = None
+
+
+DEEP_RESEARCH_TOOL = {
+    "name": "ResearchAgent",
+    "display_name": "Research Agent",
+    "description": "The Research Agent is a sub-agent that conducts research on a specific topic.",
+    "in_code_tool_id": "ResearchAgent",
+}
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            INSERT INTO tool (name, display_name, description, in_code_tool_id, enabled)
+            VALUES (:name, :display_name, :description, :in_code_tool_id, false)
+            """
+        ),
+        DEEP_RESEARCH_TOOL,
+    )
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+    conn.execute(
+        sa.text(
+            """
+            DELETE FROM tool
+            WHERE in_code_tool_id = :in_code_tool_id
+            """
+        ),
+        {"in_code_tool_id": DEEP_RESEARCH_TOOL["in_code_tool_id"]},
+    )
--- a/backend/alembic/versions/c9e2cd766c29_add_s3_file_store_table.py
+++ b/backend/alembic/versions/c9e2cd766c29_add_s3_file_store_table.py
@@ -257,8 +257,8 @@ def _migrate_files_to_external_storage() -> None:
            print(f"File {file_id} not found in PostgreSQL storage.")
            continue

-        lobj_id = cast(int, file_record.lobj_oid)  # type: ignore
-        file_metadata = cast(Any, file_record.file_metadata)  # type: ignore
+        lobj_id = cast(int, file_record.lobj_oid)
+        file_metadata = cast(Any, file_record.file_metadata)

        # Read file content from PostgreSQL
        try:
@@ -280,7 +280,7 @@ def _migrate_files_to_external_storage() -> None:
            else:
                # Convert other types to dict if possible, otherwise None
                try:
-                    file_metadata = dict(file_record.file_metadata)  # type: ignore
+                    file_metadata = dict(file_record.file_metadata)
                except (TypeError, ValueError):
                    file_metadata = None

--- a/backend/alembic/versions/d09fc20a3c66_seed_builtin_tools.py
+++ b/backend/alembic/versions/d09fc20a3c66_seed_builtin_tools.py
@@ -70,80 +70,66 @@ BUILT_IN_TOOLS = [
 def upgrade() -> None:
    conn = op.get_bind()

-    # Start transaction
-    conn.execute(sa.text("BEGIN"))
+    # Get existing tools to check what already exists
+    existing_tools = conn.execute(
+        sa.text("SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL")
+    ).fetchall()
+    existing_tool_ids = {row[0] for row in existing_tools}

-    try:
-        # Get existing tools to check what already exists
-        existing_tools = conn.execute(
-            sa.text(
-                "SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL"
+    # Insert or update built-in tools
+    for tool in BUILT_IN_TOOLS:
+        in_code_id = tool["in_code_tool_id"]
+
+        # Handle historical rename: InternetSearchTool -> WebSearchTool
+        if (
+            in_code_id == "WebSearchTool"
+            and "WebSearchTool" not in existing_tool_ids
+            and "InternetSearchTool" in existing_tool_ids
+        ):
+            # Rename the existing InternetSearchTool row in place and update fields
+            conn.execute(
+                sa.text(
+                    """
+                    UPDATE tool
+                    SET name = :name,
+                        display_name = :display_name,
+                        description = :description,
+                        in_code_tool_id = :in_code_tool_id
+                    WHERE in_code_tool_id = 'InternetSearchTool'
+                    """
+                ),
+                tool,
            )
-        ).fetchall()
-        existing_tool_ids = {row[0] for row in existing_tools}
+            # Keep the local view of existing ids in sync to avoid duplicate insert
+            existing_tool_ids.discard("InternetSearchTool")
+            existing_tool_ids.add("WebSearchTool")
+            continue

-        # Insert or update built-in tools
-        for tool in BUILT_IN_TOOLS:
-            in_code_id = tool["in_code_tool_id"]
-
-            # Handle historical rename: InternetSearchTool -> WebSearchTool
-            if (
-                in_code_id == "WebSearchTool"
-                and "WebSearchTool" not in existing_tool_ids
-                and "InternetSearchTool" in existing_tool_ids
-            ):
-                # Rename the existing InternetSearchTool row in place and update fields
-                conn.execute(
-                    sa.text(
-                        """
-                        UPDATE tool
-                        SET name = :name,
-                            display_name = :display_name,
-                            description = :description,
-                            in_code_tool_id = :in_code_tool_id
-                        WHERE in_code_tool_id = 'InternetSearchTool'
-                        """
-                    ),
-                    tool,
-                )
-                # Keep the local view of existing ids in sync to avoid duplicate insert
-                existing_tool_ids.discard("InternetSearchTool")
-                existing_tool_ids.add("WebSearchTool")
-                continue
-
-            if in_code_id in existing_tool_ids:
-                # Update existing tool
-                conn.execute(
-                    sa.text(
-                        """
-                        UPDATE tool
-                        SET name = :name,
-                            display_name = :display_name,
-                            description = :description
-                        WHERE in_code_tool_id = :in_code_tool_id
-                        """
-                    ),
-                    tool,
-                )
-            else:
-                # Insert new tool
-                conn.execute(
-                    sa.text(
-                        """
-                        INSERT INTO tool (name, display_name, description, in_code_tool_id)
-                        VALUES (:name, :display_name, :description, :in_code_tool_id)
-                        """
-                    ),
-                    tool,
-                )
-
-        # Commit transaction
-        conn.execute(sa.text("COMMIT"))
-
-    except Exception as e:
-        # Rollback on error
-        conn.execute(sa.text("ROLLBACK"))
-        raise e
+        if in_code_id in existing_tool_ids:
+            # Update existing tool
+            conn.execute(
+                sa.text(
+                    """
+                    UPDATE tool
+                    SET name = :name,
+                        display_name = :display_name,
+                        description = :description
+                    WHERE in_code_tool_id = :in_code_tool_id
+                    """
+                ),
+                tool,
+            )
+        else:
+            # Insert new tool
+            conn.execute(
+                sa.text(
+                    """
+                    INSERT INTO tool (name, display_name, description, in_code_tool_id)
+                    VALUES (:name, :display_name, :description, :in_code_tool_id)
+                    """
+                ),
+                tool,
+            )


 def downgrade() -> None:
--- a/backend/alembic/versions/d1b637d7050a_sync_exa_api_key_to_content_provider.py
+++ b/backend/alembic/versions/d1b637d7050a_sync_exa_api_key_to_content_provider.py
@@ -0,0 +1,64 @@
+"""sync_exa_api_key_to_content_provider
+
+Revision ID: d1b637d7050a
+Revises: d25168c2beee
+Create Date: 2026-01-09 15:54:15.646249
+
+"""
+
+from alembic import op
+from sqlalchemy import text
+
+
+# revision identifiers, used by Alembic.
+revision = "d1b637d7050a"
+down_revision = "d25168c2beee"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Exa uses a shared API key between search and content providers.
+    # For existing Exa search providers with API keys, create the corresponding
+    # content provider if it doesn't exist yet.
+    connection = op.get_bind()
+
+    # Check if Exa search provider exists with an API key
+    result = connection.execute(
+        text(
+            """
+            SELECT api_key FROM internet_search_provider
+            WHERE provider_type = 'exa' AND api_key IS NOT NULL
+            LIMIT 1
+            """
+        )
+    )
+    row = result.fetchone()
+
+    if row:
+        api_key = row[0]
+        # Create Exa content provider with the shared key
+        connection.execute(
+            text(
+                """
+                INSERT INTO internet_content_provider
+                (name, provider_type, api_key, is_active)
+                VALUES ('Exa', 'exa', :api_key, false)
+                ON CONFLICT (name) DO NOTHING
+                """
+            ),
+            {"api_key": api_key},
+        )
+
+
+def downgrade() -> None:
+    # Remove the Exa content provider that was created by this migration
+    connection = op.get_bind()
+    connection.execute(
+        text(
+            """
+            DELETE FROM internet_content_provider
+            WHERE provider_type = 'exa'
+            """
+        )
+    )
--- a/backend/alembic/versions/d25168c2beee_tool_name_consistency.py
+++ b/backend/alembic/versions/d25168c2beee_tool_name_consistency.py
@@ -0,0 +1,86 @@
+"""tool_name_consistency
+
+Revision ID: d25168c2beee
+Revises: 8405ca81cc83
+Create Date: 2026-01-11 17:54:40.135777
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "d25168c2beee"
+down_revision = "8405ca81cc83"
+branch_labels = None
+depends_on = None
+
+
+# Currently the seeded tools have the in_code_tool_id == name
+CURRENT_TOOL_NAME_MAPPING = [
+    "SearchTool",
+    "WebSearchTool",
+    "ImageGenerationTool",
+    "PythonTool",
+    "OpenURLTool",
+    "KnowledgeGraphTool",
+    "ResearchAgent",
+]
+
+# Mapping of in_code_tool_id -> name
+# These are the expected names that we want in the database
+EXPECTED_TOOL_NAME_MAPPING = {
+    "SearchTool": "internal_search",
+    "WebSearchTool": "web_search",
+    "ImageGenerationTool": "generate_image",
+    "PythonTool": "python",
+    "OpenURLTool": "open_url",
+    "KnowledgeGraphTool": "run_kg_search",
+    "ResearchAgent": "research_agent",
+}
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+
+    # Mapping of in_code_tool_id to the NAME constant from each tool class
+    # These match the .name property of each tool implementation
+    tool_name_mapping = EXPECTED_TOOL_NAME_MAPPING
+
+    # Update the name column for each tool based on its in_code_tool_id
+    for in_code_tool_id, expected_name in tool_name_mapping.items():
+        conn.execute(
+            sa.text(
+                """
+                UPDATE tool
+                SET name = :expected_name
+                WHERE in_code_tool_id = :in_code_tool_id
+                """
+            ),
+            {
+                "expected_name": expected_name,
+                "in_code_tool_id": in_code_tool_id,
+            },
+        )
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+
+    # Reverse the migration by setting name back to in_code_tool_id
+    # This matches the original pattern where name was the class name
+    for in_code_tool_id in CURRENT_TOOL_NAME_MAPPING:
+        conn.execute(
+            sa.text(
+                """
+                UPDATE tool
+                SET name = :current_name
+                WHERE in_code_tool_id = :in_code_tool_id
+                """
+            ),
+            {
+                "current_name": in_code_tool_id,
+                "in_code_tool_id": in_code_tool_id,
+            },
+        )
--- a/backend/alembic/versions/e209dc5a8156_added_prune_frequency.py
+++ b/backend/alembic/versions/e209dc5a8156_added_prune_frequency.py
@@ -11,8 +11,8 @@ import sqlalchemy as sa

 revision = "e209dc5a8156"
 down_revision = "48d14957fe80"
-branch_labels = None  # type: ignore
-depends_on = None  # type: ignore
+branch_labels = None
+depends_on = None


 def upgrade() -> None:
--- a/backend/alembic/versions/e8f0d2a38171_add_status_to_mcp_server_and_make_auth_.py
+++ b/backend/alembic/versions/e8f0d2a38171_add_status_to_mcp_server_and_make_auth_.py
@@ -8,7 +8,7 @@ Create Date: 2025-11-28 11:15:37.667340

 from alembic import op
 import sqlalchemy as sa
-from onyx.db.enums import (  # type: ignore[import-untyped]
+from onyx.db.enums import (
    MCPTransport,
    MCPAuthenticationType,
    MCPAuthenticationPerformer,
--- a/backend/alembic_tenants/env.py
+++ b/backend/alembic_tenants/env.py
@@ -20,7 +20,9 @@ config = context.config
 if config.config_file_name is not None and config.attributes.get(
    "configure_logger", True
 ):
-    fileConfig(config.config_file_name)
+    # disable_existing_loggers=False prevents breaking pytest's caplog fixture
+    # See: https://pytest-alembic.readthedocs.io/en/latest/setup.html#caplog-issues
+    fileConfig(config.config_file_name, disable_existing_loggers=False)

 # add your model's MetaData object here
 # for 'autogenerate' support
@@ -82,9 +84,9 @@ def run_migrations_offline() -> None:
 def do_run_migrations(connection: Connection) -> None:
    context.configure(
        connection=connection,
-        target_metadata=target_metadata,  # type: ignore
+        target_metadata=target_metadata,  # type: ignore[arg-type]
        include_object=include_object,
-    )  # type: ignore
+    )

    with context.begin_transaction():
        context.run_migrations()
@@ -108,9 +110,24 @@ async def run_async_migrations() -> None:


 def run_migrations_online() -> None:
-    """Run migrations in 'online' mode."""
+    """Run migrations in 'online' mode.

-    asyncio.run(run_async_migrations())
+    Supports pytest-alembic by checking for a pre-configured connection
+    in context.config.attributes["connection"]. If present, uses that
+    connection/engine directly instead of creating a new async engine.
+    """
+    # Check if pytest-alembic is providing a connection/engine
+    connectable = context.config.attributes.get("connection", None)
+
+    if connectable is not None:
+        # pytest-alembic is providing an engine - use it directly
+        with connectable.connect() as connection:
+            do_run_migrations(connection)
+            # Commit to ensure changes are visible to next migration
+            connection.commit()
+    else:
+        # Normal operation - use async migrations
+        asyncio.run(run_async_migrations())


 if context.is_offline_mode():
--- a/backend/ee/onyx/configs/app_configs.py
+++ b/backend/ee/onyx/configs/app_configs.py
@@ -109,11 +109,6 @@ CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS = float(


 STRIPE_SECRET_KEY = os.environ.get("STRIPE_SECRET_KEY")
-STRIPE_PRICE_ID = os.environ.get("STRIPE_PRICE")
-
-OPENAI_DEFAULT_API_KEY = os.environ.get("OPENAI_DEFAULT_API_KEY")
-ANTHROPIC_DEFAULT_API_KEY = os.environ.get("ANTHROPIC_DEFAULT_API_KEY")
-COHERE_DEFAULT_API_KEY = os.environ.get("COHERE_DEFAULT_API_KEY")

 # JWT Public Key URL
 JWT_PUBLIC_KEY_URL: str | None = os.getenv("JWT_PUBLIC_KEY_URL", None)
@@ -133,3 +128,8 @@ MARKETING_POSTHOG_API_KEY = os.environ.get("MARKETING_POSTHOG_API_KEY")
 HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")

 GATED_TENANTS_KEY = "gated_tenants"
+
+# License enforcement - when True, blocks API access for gated/expired licenses
+LICENSE_ENFORCEMENT_ENABLED = (
+    os.environ.get("LICENSE_ENFORCEMENT_ENABLED", "").lower() == "true"
+)
--- a/backend/ee/onyx/db/document_set.py
+++ b/backend/ee/onyx/db/document_set.py
@@ -118,6 +118,6 @@ def fetch_document_sets(
            .all()
        )

-        document_set_with_cc_pairs.append((document_set, cc_pairs))  # type: ignore
+        document_set_with_cc_pairs.append((document_set, cc_pairs))

    return document_set_with_cc_pairs
--- a/backend/ee/onyx/db/license.py
+++ b/backend/ee/onyx/db/license.py
@@ -0,0 +1,278 @@
+"""Database and cache operations for the license table."""
+
+from datetime import datetime
+
+from sqlalchemy import func
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from ee.onyx.server.license.models import LicenseMetadata
+from ee.onyx.server.license.models import LicensePayload
+from ee.onyx.server.license.models import LicenseSource
+from onyx.db.models import License
+from onyx.db.models import User
+from onyx.redis.redis_pool import get_redis_client
+from onyx.redis.redis_pool import get_redis_replica_client
+from onyx.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT
+from shared_configs.contextvars import get_current_tenant_id
+
+logger = setup_logger()
+
+LICENSE_METADATA_KEY = "license:metadata"
+LICENSE_CACHE_TTL_SECONDS = 86400  # 24 hours
+
+
+# -----------------------------------------------------------------------------
+# Database CRUD Operations
+# -----------------------------------------------------------------------------
+
+
+def get_license(db_session: Session) -> License | None:
+    """
+    Get the current license (singleton pattern - only one row).
+
+    Args:
+        db_session: Database session
+
+    Returns:
+        License object if exists, None otherwise
+    """
+    return db_session.execute(select(License)).scalars().first()
+
+
+def upsert_license(db_session: Session, license_data: str) -> License:
+    """
+    Insert or update the license (singleton pattern).
+
+    Args:
+        db_session: Database session
+        license_data: Base64-encoded signed license blob
+
+    Returns:
+        The created or updated License object
+    """
+    existing = get_license(db_session)
+
+    if existing:
+        existing.license_data = license_data
+        db_session.commit()
+        db_session.refresh(existing)
+        logger.info("License updated")
+        return existing
+
+    new_license = License(license_data=license_data)
+    db_session.add(new_license)
+    db_session.commit()
+    db_session.refresh(new_license)
+    logger.info("License created")
+    return new_license
+
+
+def delete_license(db_session: Session) -> bool:
+    """
+    Delete the current license.
+
+    Args:
+        db_session: Database session
+
+    Returns:
+        True if deleted, False if no license existed
+    """
+    existing = get_license(db_session)
+    if existing:
+        db_session.delete(existing)
+        db_session.commit()
+        logger.info("License deleted")
+        return True
+    return False
+
+
+# -----------------------------------------------------------------------------
+# Seat Counting
+# -----------------------------------------------------------------------------
+
+
+def get_used_seats(tenant_id: str | None = None) -> int:
+    """
+    Get current seat usage.
+
+    For multi-tenant: counts users in UserTenantMapping for this tenant.
+    For self-hosted: counts all active users (includes both Onyx UI users
+    and Slack users who have been converted to Onyx users).
+    """
+    if MULTI_TENANT:
+        from ee.onyx.server.tenants.user_mapping import get_tenant_count
+
+        return get_tenant_count(tenant_id or get_current_tenant_id())
+    else:
+        # Self-hosted: count all active users (Onyx + converted Slack users)
+        from onyx.db.engine.sql_engine import get_session_with_current_tenant
+
+        with get_session_with_current_tenant() as db_session:
+            result = db_session.execute(
+                select(func.count()).select_from(User).where(User.is_active)  # type: ignore
+            )
+            return result.scalar() or 0
+
+
+# -----------------------------------------------------------------------------
+# Redis Cache Operations
+# -----------------------------------------------------------------------------
+
+
+def get_cached_license_metadata(tenant_id: str | None = None) -> LicenseMetadata | None:
+    """
+    Get license metadata from Redis cache.
+
+    Args:
+        tenant_id: Tenant ID (for multi-tenant deployments)
+
+    Returns:
+        LicenseMetadata if cached, None otherwise
+    """
+    tenant = tenant_id or get_current_tenant_id()
+    redis_client = get_redis_replica_client(tenant_id=tenant)
+
+    cached = redis_client.get(LICENSE_METADATA_KEY)
+    if cached:
+        try:
+            cached_str: str
+            if isinstance(cached, bytes):
+                cached_str = cached.decode("utf-8")
+            else:
+                cached_str = str(cached)
+            return LicenseMetadata.model_validate_json(cached_str)
+        except Exception as e:
+            logger.warning(f"Failed to parse cached license metadata: {e}")
+            return None
+    return None
+
+
+def invalidate_license_cache(tenant_id: str | None = None) -> None:
+    """
+    Invalidate the license metadata cache (not the license itself).
+
+    This deletes the cached LicenseMetadata from Redis. The actual license
+    in the database is not affected. Redis delete is idempotent - if the
+    key doesn't exist, this is a no-op.
+
+    Args:
+        tenant_id: Tenant ID (for multi-tenant deployments)
+    """
+    tenant = tenant_id or get_current_tenant_id()
+    redis_client = get_redis_client(tenant_id=tenant)
+    redis_client.delete(LICENSE_METADATA_KEY)
+    logger.info("License cache invalidated")
+
+
+def update_license_cache(
+    payload: LicensePayload,
+    source: LicenseSource | None = None,
+    grace_period_end: datetime | None = None,
+    tenant_id: str | None = None,
+) -> LicenseMetadata:
+    """
+    Update the Redis cache with license metadata.
+
+    We cache all license statuses (ACTIVE, GRACE_PERIOD, GATED_ACCESS) because:
+    1. Frontend needs status to show appropriate UI/banners
+    2. Caching avoids repeated DB + crypto verification on every request
+    3. Status enforcement happens at the feature level, not here
+
+    Args:
+        payload: Verified license payload
+        source: How the license was obtained
+        grace_period_end: Optional grace period end time
+        tenant_id: Tenant ID (for multi-tenant deployments)
+
+    Returns:
+        The cached LicenseMetadata
+    """
+    from ee.onyx.utils.license import get_license_status
+
+    tenant = tenant_id or get_current_tenant_id()
+    redis_client = get_redis_client(tenant_id=tenant)
+
+    used_seats = get_used_seats(tenant)
+    status = get_license_status(payload, grace_period_end)
+
+    metadata = LicenseMetadata(
+        tenant_id=payload.tenant_id,
+        organization_name=payload.organization_name,
+        seats=payload.seats,
+        used_seats=used_seats,
+        plan_type=payload.plan_type,
+        issued_at=payload.issued_at,
+        expires_at=payload.expires_at,
+        grace_period_end=grace_period_end,
+        status=status,
+        source=source,
+        stripe_subscription_id=payload.stripe_subscription_id,
+    )
+
+    redis_client.setex(
+        LICENSE_METADATA_KEY,
+        LICENSE_CACHE_TTL_SECONDS,
+        metadata.model_dump_json(),
+    )
+
+    logger.info(f"License cache updated: {metadata.seats} seats, status={status.value}")
+    return metadata
+
+
+def refresh_license_cache(
+    db_session: Session,
+    tenant_id: str | None = None,
+) -> LicenseMetadata | None:
+    """
+    Refresh the license cache from the database.
+
+    Args:
+        db_session: Database session
+        tenant_id: Tenant ID (for multi-tenant deployments)
+
+    Returns:
+        LicenseMetadata if license exists, None otherwise
+    """
+    from ee.onyx.utils.license import verify_license_signature
+
+    license_record = get_license(db_session)
+    if not license_record:
+        invalidate_license_cache(tenant_id)
+        return None
+
+    try:
+        payload = verify_license_signature(license_record.license_data)
+        return update_license_cache(
+            payload,
+            source=LicenseSource.AUTO_FETCH,
+            tenant_id=tenant_id,
+        )
+    except ValueError as e:
+        logger.error(f"Failed to verify license during cache refresh: {e}")
+        invalidate_license_cache(tenant_id)
+        return None
+
+
+def get_license_metadata(
+    db_session: Session,
+    tenant_id: str | None = None,
+) -> LicenseMetadata | None:
+    """
+    Get license metadata, using cache if available.
+
+    Args:
+        db_session: Database session
+        tenant_id: Tenant ID (for multi-tenant deployments)
+
+    Returns:
+        LicenseMetadata if license exists, None otherwise
+    """
+    # Try cache first
+    cached = get_cached_license_metadata(tenant_id)
+    if cached:
+        return cached
+
+    # Refresh from database
+    return refresh_license_cache(db_session, tenant_id)
--- a/backend/ee/onyx/db/persona.py
+++ b/backend/ee/onyx/db/persona.py
@@ -3,30 +3,42 @@ from uuid import UUID
 from sqlalchemy.orm import Session

 from onyx.configs.constants import NotificationType
+from onyx.db.models import Persona
 from onyx.db.models import Persona__User
 from onyx.db.models import Persona__UserGroup
 from onyx.db.notification import create_notification
 from onyx.server.features.persona.models import PersonaSharedNotificationData


-def make_persona_private(
+def update_persona_access(
    persona_id: int,
    creator_user_id: UUID | None,
-    user_ids: list[UUID] | None,
-    group_ids: list[int] | None,
    db_session: Session,
+    is_public: bool | None = None,
+    user_ids: list[UUID] | None = None,
+    group_ids: list[int] | None = None,
 ) -> None:
-    """NOTE(rkuo): This function batches all updates into a single commit. If we don't
-    dedupe the inputs, the commit will exception."""
+    """Updates the access settings for a persona including public status, user shares,
+    and group shares.

-    db_session.query(Persona__User).filter(
-        Persona__User.persona_id == persona_id
-    ).delete(synchronize_session="fetch")
-    db_session.query(Persona__UserGroup).filter(
-        Persona__UserGroup.persona_id == persona_id
-    ).delete(synchronize_session="fetch")
+    NOTE: This function batches all updates. If we don't dedupe the inputs,
+    the commit will exception.
+
+    NOTE: Callers are responsible for committing."""
+
+    if is_public is not None:
+        persona = db_session.query(Persona).filter(Persona.id == persona_id).first()
+        if persona:
+            persona.is_public = is_public
+
+    # NOTE: For user-ids and group-ids, `None` means "leave unchanged", `[]` means "clear all shares",
+    # and a non-empty list means "replace with these shares".
+
+    if user_ids is not None:
+        db_session.query(Persona__User).filter(
+            Persona__User.persona_id == persona_id
+        ).delete(synchronize_session="fetch")

-    if user_ids:
        user_ids_set = set(user_ids)
        for user_id in user_ids_set:
            db_session.add(Persona__User(persona_id=persona_id, user_id=user_id))
@@ -34,17 +46,20 @@ def make_persona_private(
                create_notification(
                    user_id=user_id,
                    notif_type=NotificationType.PERSONA_SHARED,
+                    title="A new agent was shared with you!",
                    db_session=db_session,
                    additional_data=PersonaSharedNotificationData(
                        persona_id=persona_id,
                    ).model_dump(),
                )

-    if group_ids:
+    if group_ids is not None:
+        db_session.query(Persona__UserGroup).filter(
+            Persona__UserGroup.persona_id == persona_id
+        ).delete(synchronize_session="fetch")
+
        group_ids_set = set(group_ids)
        for group_id in group_ids_set:
            db_session.add(
                Persona__UserGroup(persona_id=persona_id, user_group_id=group_id)
            )
-
-    db_session.commit()
--- a/backend/ee/onyx/db/search.py
+++ b/backend/ee/onyx/db/search.py
@@ -0,0 +1,64 @@
+import uuid
+from datetime import timedelta
+from uuid import UUID
+
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from onyx.db.engine.time_utils import get_db_current_time
+from onyx.db.models import SearchQuery
+
+
+def create_search_query(
+    db_session: Session,
+    user_id: UUID,
+    query: str,
+    query_expansions: list[str] | None = None,
+) -> SearchQuery:
+    """Create and persist a `SearchQuery` row.
+
+    Notes:
+    - `SearchQuery.id` is a UUID PK without a server-side default, so we generate it.
+    - `created_at` is filled by the DB (server_default=now()).
+    """
+    search_query = SearchQuery(
+        id=uuid.uuid4(),
+        user_id=user_id,
+        query=query,
+        query_expansions=query_expansions,
+    )
+    db_session.add(search_query)
+    db_session.commit()
+    db_session.refresh(search_query)
+    return search_query
+
+
+def fetch_search_queries_for_user(
+    db_session: Session,
+    user_id: UUID,
+    filter_days: int | None = None,
+    limit: int | None = None,
+) -> list[SearchQuery]:
+    """Fetch `SearchQuery` rows for a user.
+
+    Args:
+        user_id: User UUID.
+        filter_days: Optional time filter. If provided, only rows created within
+            the last `filter_days` days are returned.
+        limit: Optional max number of rows to return.
+    """
+    if filter_days is not None and filter_days <= 0:
+        raise ValueError("filter_days must be > 0")
+
+    stmt = select(SearchQuery).where(SearchQuery.user_id == user_id)
+
+    if filter_days is not None and filter_days > 0:
+        cutoff = get_db_current_time(db_session) - timedelta(days=filter_days)
+        stmt = stmt.where(SearchQuery.created_at >= cutoff)
+
+    stmt = stmt.order_by(SearchQuery.created_at.desc())
+
+    if limit is not None:
+        stmt = stmt.limit(limit)
+
+    return list(db_session.scalars(stmt).all())
--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -14,17 +14,19 @@ from ee.onyx.server.enterprise_settings.api import (
    basic_router as enterprise_settings_router,
 )
 from ee.onyx.server.evals.api import router as evals_router
+from ee.onyx.server.license.api import router as license_router
 from ee.onyx.server.manage.standard_answer import router as standard_answer_router
+from ee.onyx.server.middleware.license_enforcement import (
+    add_license_enforcement_middleware,
+)
 from ee.onyx.server.middleware.tenant_tracking import (
    add_api_server_tenant_id_middleware,
 )
 from ee.onyx.server.oauth.api import router as ee_oauth_router
-from ee.onyx.server.query_and_chat.chat_backend import (
-    router as chat_router,
-)
 from ee.onyx.server.query_and_chat.query_backend import (
    basic_router as ee_query_router,
 )
+from ee.onyx.server.query_and_chat.search_backend import router as search_router
 from ee.onyx.server.query_history.api import router as query_history_router
 from ee.onyx.server.reporting.usage_export_api import router as usage_export_router
 from ee.onyx.server.seeding import seed_db
@@ -84,6 +86,10 @@ def get_application() -> FastAPI:
    if MULTI_TENANT:
        add_api_server_tenant_id_middleware(application, logger)

+    # Add license enforcement middleware (runs after tenant tracking)
+    # This blocks access when license is expired/gated
+    add_license_enforcement_middleware(application, logger)
+
    if AUTH_TYPE == AuthType.CLOUD:
        # For Google OAuth, refresh tokens are requested by:
        # 1. Adding the right scopes
@@ -123,7 +129,7 @@ def get_application() -> FastAPI:
    # EE only backend APIs
    include_router_with_global_prefix_prepended(application, query_router)
    include_router_with_global_prefix_prepended(application, ee_query_router)
-    include_router_with_global_prefix_prepended(application, chat_router)
+    include_router_with_global_prefix_prepended(application, search_router)
    include_router_with_global_prefix_prepended(application, standard_answer_router)
    include_router_with_global_prefix_prepended(application, ee_oauth_router)
    include_router_with_global_prefix_prepended(application, ee_document_cc_pair_router)
@@ -139,6 +145,8 @@ def get_application() -> FastAPI:
    )
    include_router_with_global_prefix_prepended(application, enterprise_settings_router)
    include_router_with_global_prefix_prepended(application, usage_export_router)
+    # License management
+    include_router_with_global_prefix_prepended(application, license_router)

    if MULTI_TENANT:
        # Tenant management
--- a/backend/tests/unit/onyx/onyxbot/init.py
+++ b/backend/tests/unit/onyx/onyxbot/init.py
--- a/backend/ee/onyx/prompts/query_expansion.py
+++ b/backend/ee/onyx/prompts/query_expansion.py
@@ -0,0 +1,27 @@
+# Single message is likely most reliable and generally better for this task
+# No final reminders at the end since the user query is expected to be short
+# If it is not short, it should go into the chat flow so we do not need to account for this.
+KEYWORD_EXPANSION_PROMPT = """
+Generate a set of keyword-only queries to help find relevant documents for the provided query. \
+These queries will be passed to a bm25-based keyword search engine. \
+Provide a single query per line (where each query consists of one or more keywords). \
+The queries must be purely keywords and not contain any filler natural language. \
+The each query should have as few keywords as necessary to represent the user's search intent. \
+If there are no useful expansions, simply return the original query with no additional keyword queries. \
+CRITICAL: Do not include any additional formatting, comments, or anything aside from the keyword queries.
+
+The user query is:
+{user_query}
+""".strip()
+
+
+QUERY_TYPE_PROMPT = """
+Determine if the provided query is better suited for a keyword search or a semantic search.
+Respond with "keyword" or "semantic" literally and nothing else.
+Do not provide any additional text or reasoning to your response.
+
+CRITICAL: It must only be 1 single word - EITHER "keyword" or "semantic".
+
+The user query is:
+{user_query}
+""".strip()
--- a/backend/ee/onyx/prompts/search_flow_classification.py
+++ b/backend/ee/onyx/prompts/search_flow_classification.py
@@ -0,0 +1,42 @@
+# ruff: noqa: E501, W605 start
+SEARCH_CLASS = "search"
+CHAT_CLASS = "chat"
+
+# Will note that with many larger LLMs the latency on running this prompt via third party APIs is as high as 2 seconds which is too slow for many
+# use cases.
+SEARCH_CHAT_PROMPT = f"""
+Determine if the following query is better suited for a search UI or a chat UI. Respond with "{SEARCH_CLASS}" or "{CHAT_CLASS}" literally and nothing else. \
+Do not provide any additional text or reasoning to your response. CRITICAL, IT MUST ONLY BE 1 SINGLE WORD - EITHER "{SEARCH_CLASS}" or "{CHAT_CLASS}".
+
+# Classification Guidelines:
+## {SEARCH_CLASS}
+- If the query consists entirely of keywords or query doesn't require any answer from the AI
+- If the query is a short statement that seems like a search query rather than a question
+- If the query feels nonsensical or is a short phrase that possibly describes a document or information that could be found in a internal document
+
+### Examples of {SEARCH_CLASS} queries:
+- Find me the document that goes over the onboarding process for a new hire
+- Pull requests since last week
+- Sales Runbook AMEA Region
+- Procurement process
+- Retrieve the PRD for project X
+
+## {CHAT_CLASS}
+- If the query is asking a question that requires an answer rather than a document
+- If the query is asking for a solution, suggestion, or general help
+- If the query is seeking information that is on the web and likely not in a company internal document
+- If the query should be answered without any context from additional documents or searches
+
+### Examples of {CHAT_CLASS} queries:
+- What led us to win the deal with company X? (seeking answer)
+- Google Drive not sync-ing files to my computer (seeking solution)
+- Review my email: <whatever the email is> (general help)
+- Write me a script to... (general help)
+- Cheap flights Europe to Tokyo (information likely found on the web, not internal)
+
+# User Query:
+{{user_query}}
+
+REMEMBER TO ONLY RESPOND WITH "{SEARCH_CLASS}" OR "{CHAT_CLASS}" AND NOTHING ELSE.
+""".strip()
+# ruff: noqa: E501, W605 end
--- a/backend/ee/onyx/search/process_search_query.py
+++ b/backend/ee/onyx/search/process_search_query.py
@@ -0,0 +1,270 @@
+from collections.abc import Generator
+
+from sqlalchemy.orm import Session
+
+from ee.onyx.db.search import create_search_query
+from ee.onyx.secondary_llm_flows.query_expansion import expand_keywords
+from ee.onyx.server.query_and_chat.models import SearchDocWithContent
+from ee.onyx.server.query_and_chat.models import SearchFullResponse
+from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
+from ee.onyx.server.query_and_chat.streaming_models import LLMSelectedDocsPacket
+from ee.onyx.server.query_and_chat.streaming_models import SearchDocsPacket
+from ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket
+from ee.onyx.server.query_and_chat.streaming_models import SearchQueriesPacket
+from onyx.context.search.models import BaseFilters
+from onyx.context.search.models import ChunkSearchRequest
+from onyx.context.search.models import InferenceChunk
+from onyx.context.search.pipeline import merge_individual_chunks
+from onyx.context.search.pipeline import search_pipeline
+from onyx.db.models import User
+from onyx.document_index.factory import get_current_primary_default_document_index
+from onyx.document_index.interfaces import DocumentIndex
+from onyx.llm.factory import get_default_llm
+from onyx.secondary_llm_flows.document_filter import select_sections_for_expansion
+from onyx.tools.tool_implementations.search.search_utils import (
+    weighted_reciprocal_rank_fusion,
+)
+from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
+
+logger = setup_logger()
+
+
+# This is just a heuristic that also happens to work well for the UI/UX
+# Users would not find it useful to see a huge list of suggested docs
+# but more than 1 is also likely good as many questions may target more than 1 doc.
+TARGET_NUM_SECTIONS_FOR_LLM_SELECTION = 3
+
+
+def _run_single_search(
+    query: str,
+    filters: BaseFilters | None,
+    document_index: DocumentIndex,
+    user: User | None,
+    db_session: Session,
+) -> list[InferenceChunk]:
+    """Execute a single search query and return chunks."""
+    chunk_search_request = ChunkSearchRequest(
+        query=query,
+        user_selected_filters=filters,
+    )
+
+    return search_pipeline(
+        chunk_search_request=chunk_search_request,
+        document_index=document_index,
+        user=user,
+        persona=None,  # No persona for direct search
+        db_session=db_session,
+    )
+
+
+def stream_search_query(
+    request: SendSearchQueryRequest,
+    user: User | None,
+    db_session: Session,
+) -> Generator[
+    SearchQueriesPacket | SearchDocsPacket | LLMSelectedDocsPacket | SearchErrorPacket,
+    None,
+    None,
+]:
+    """
+    Core search function that yields streaming packets.
+    Used by both streaming and non-streaming endpoints.
+    """
+    # Get document index
+    document_index = get_current_primary_default_document_index(db_session)
+
+    # Determine queries to execute
+    original_query = request.search_query
+    keyword_expansions: list[str] = []
+
+    if request.run_query_expansion:
+        try:
+            llm = get_default_llm()
+            keyword_expansions = expand_keywords(
+                user_query=original_query,
+                llm=llm,
+            )
+            if keyword_expansions:
+                logger.debug(
+                    f"Query expansion generated {len(keyword_expansions)} keyword queries"
+                )
+        except Exception as e:
+            logger.warning(f"Query expansion failed: {e}; using original query only.")
+            keyword_expansions = []
+
+    # Build list of all executed queries for tracking
+    all_executed_queries = [original_query] + keyword_expansions
+
+    # TODO remove this check, user should not be None
+    if user is not None:
+        create_search_query(
+            db_session=db_session,
+            user_id=user.id,
+            query=request.search_query,
+            query_expansions=keyword_expansions if keyword_expansions else None,
+        )
+
+    # Execute search(es)
+    if not keyword_expansions:
+        # Single query (original only) - no threading needed
+        chunks = _run_single_search(
+            query=original_query,
+            filters=request.filters,
+            document_index=document_index,
+            user=user,
+            db_session=db_session,
+        )
+    else:
+        # Multiple queries - run in parallel and merge with RRF
+        # First query is the original (semantic), rest are keyword expansions
+        search_functions = [
+            (
+                _run_single_search,
+                (query, request.filters, document_index, user, db_session),
+            )
+            for query in all_executed_queries
+        ]
+
+        # Run all searches in parallel
+        all_search_results: list[list[InferenceChunk]] = (
+            run_functions_tuples_in_parallel(
+                search_functions,
+                allow_failures=True,
+            )
+        )
+
+        # Separate original query results from keyword expansion results
+        # Note that in rare cases, the original query may have failed and so we may be
+        # just overweighting one set of keyword results, should be not a big deal though.
+        original_result = all_search_results[0] if all_search_results else []
+        keyword_results = all_search_results[1:] if len(all_search_results) > 1 else []
+
+        # Build valid results and weights
+        # Original query (semantic): weight 2.0
+        # Keyword expansions: weight 1.0 each
+        valid_results: list[list[InferenceChunk]] = []
+        weights: list[float] = []
+
+        if original_result:
+            valid_results.append(original_result)
+            weights.append(2.0)
+
+        for keyword_result in keyword_results:
+            if keyword_result:
+                valid_results.append(keyword_result)
+                weights.append(1.0)
+
+        if not valid_results:
+            logger.warning("All parallel searches returned empty results")
+            chunks = []
+        else:
+            chunks = weighted_reciprocal_rank_fusion(
+                ranked_results=valid_results,
+                weights=weights,
+                id_extractor=lambda chunk: f"{chunk.document_id}_{chunk.chunk_id}",
+            )
+
+    # Merge chunks into sections
+    sections = merge_individual_chunks(chunks)
+
+    # Apply LLM document selection if requested
+    # num_docs_fed_to_llm_selection specifies how many sections to feed to the LLM for selection
+    # The LLM will always try to select TARGET_NUM_SECTIONS_FOR_LLM_SELECTION sections from those fed to it
+    # llm_selected_doc_ids will be:
+    #   - None if LLM selection was not requested or failed
+    #   - Empty list if LLM selection ran but selected nothing
+    #   - List of doc IDs if LLM selection succeeded
+    run_llm_selection = (
+        request.num_docs_fed_to_llm_selection is not None
+        and request.num_docs_fed_to_llm_selection >= 1
+    )
+    llm_selected_doc_ids: list[str] | None = None
+    llm_selection_failed = False
+    if run_llm_selection and sections:
+        try:
+            llm = get_default_llm()
+            sections_to_evaluate = sections[: request.num_docs_fed_to_llm_selection]
+            selected_sections, _ = select_sections_for_expansion(
+                sections=sections_to_evaluate,
+                user_query=original_query,
+                llm=llm,
+                max_sections=TARGET_NUM_SECTIONS_FOR_LLM_SELECTION,
+                try_to_fill_to_max=True,
+            )
+            # Extract unique document IDs from selected sections (may be empty)
+            llm_selected_doc_ids = list(
+                dict.fromkeys(
+                    section.center_chunk.document_id for section in selected_sections
+                )
+            )
+            logger.debug(
+                f"LLM document selection evaluated {len(sections_to_evaluate)} sections, "
+                f"selected {len(selected_sections)} sections with doc IDs: {llm_selected_doc_ids}"
+            )
+        except Exception as e:
+            # Allowing a blanket exception here as this step is not critical and the rest of the results are still valid
+            logger.warning(f"LLM document selection failed: {e}")
+            llm_selection_failed = True
+    elif run_llm_selection and not sections:
+        # LLM selection requested but no sections to evaluate
+        llm_selected_doc_ids = []
+
+    # Convert to SearchDocWithContent list, optionally including content
+    search_docs = SearchDocWithContent.from_inference_sections(
+        sections,
+        include_content=request.include_content,
+        is_internet=False,
+    )
+
+    # Yield queries packet
+    yield SearchQueriesPacket(all_executed_queries=all_executed_queries)
+
+    # Yield docs packet
+    yield SearchDocsPacket(search_docs=search_docs)
+
+    # Yield LLM selected docs packet if LLM selection was requested
+    # - llm_selected_doc_ids is None if selection failed
+    # - llm_selected_doc_ids is empty list if no docs were selected
+    # - llm_selected_doc_ids is list of IDs if docs were selected
+    if run_llm_selection:
+        yield LLMSelectedDocsPacket(
+            llm_selected_doc_ids=None if llm_selection_failed else llm_selected_doc_ids
+        )
+
+
+def gather_search_stream(
+    packets: Generator[
+        SearchQueriesPacket
+        | SearchDocsPacket
+        | LLMSelectedDocsPacket
+        | SearchErrorPacket,
+        None,
+        None,
+    ],
+) -> SearchFullResponse:
+    """
+    Aggregate all streaming packets into SearchFullResponse.
+    """
+    all_executed_queries: list[str] = []
+    search_docs: list[SearchDocWithContent] = []
+    llm_selected_doc_ids: list[str] | None = None
+    error: str | None = None
+
+    for packet in packets:
+        if isinstance(packet, SearchQueriesPacket):
+            all_executed_queries = packet.all_executed_queries
+        elif isinstance(packet, SearchDocsPacket):
+            search_docs = packet.search_docs
+        elif isinstance(packet, LLMSelectedDocsPacket):
+            llm_selected_doc_ids = packet.llm_selected_doc_ids
+        elif isinstance(packet, SearchErrorPacket):
+            error = packet.error
+
+    return SearchFullResponse(
+        all_executed_queries=all_executed_queries,
+        search_docs=search_docs,
+        doc_selection_reasoning=None,
+        llm_selected_doc_ids=llm_selected_doc_ids,
+        error=error,
+    )
--- a/backend/ee/onyx/secondary_llm_flows/init.py
+++ b/backend/ee/onyx/secondary_llm_flows/init.py
--- a/backend/ee/onyx/secondary_llm_flows/query_expansion.py
+++ b/backend/ee/onyx/secondary_llm_flows/query_expansion.py
@@ -0,0 +1,92 @@
+import re
+
+from ee.onyx.prompts.query_expansion import KEYWORD_EXPANSION_PROMPT
+from onyx.llm.interfaces import LLM
+from onyx.llm.models import LanguageModelInput
+from onyx.llm.models import ReasoningEffort
+from onyx.llm.models import UserMessage
+from onyx.llm.utils import llm_response_to_string
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+# Pattern to remove common LLM artifacts: brackets, quotes, list markers, etc.
+CLEANUP_PATTERN = re.compile(r'[\[\]"\'`]')
+
+
+def _clean_keyword_line(line: str) -> str:
+    """Clean a keyword line by removing common LLM artifacts.
+
+    Removes brackets, quotes, and other characters that LLMs may accidentally
+    include in their output.
+    """
+    # Remove common artifacts
+    cleaned = CLEANUP_PATTERN.sub("", line)
+    # Remove leading list markers like "1.", "2.", "-", "*"
+    cleaned = re.sub(r"^\s*(?:\d+[\.\)]\s*|[-*]\s*)", "", cleaned)
+    return cleaned.strip()
+
+
+def expand_keywords(
+    user_query: str,
+    llm: LLM,
+) -> list[str]:
+    """Expand a user query into multiple keyword-only queries for BM25 search.
+
+    Uses an LLM to generate keyword-based search queries that capture different
+    aspects of the user's search intent. Returns only the expanded queries,
+    not the original query.
+
+    Args:
+        user_query: The original search query from the user
+        llm: Language model to use for keyword expansion
+
+    Returns:
+        List of expanded keyword queries (excluding the original query).
+        Returns empty list if expansion fails or produces no useful expansions.
+    """
+    messages: LanguageModelInput = [
+        UserMessage(content=KEYWORD_EXPANSION_PROMPT.format(user_query=user_query))
+    ]
+
+    try:
+        response = llm.invoke(
+            prompt=messages,
+            reasoning_effort=ReasoningEffort.OFF,
+            # Limit output - we only expect a few short keyword queries
+            max_tokens=150,
+        )
+
+        content = llm_response_to_string(response).strip()
+
+        if not content:
+            logger.warning("Keyword expansion returned empty response.")
+            return []
+
+        # Parse response - each line is a separate keyword query
+        # Clean each line to remove LLM artifacts and drop empty lines
+        parsed_queries = []
+        for line in content.strip().split("\n"):
+            cleaned = _clean_keyword_line(line)
+            if cleaned:
+                parsed_queries.append(cleaned)
+
+        if not parsed_queries:
+            logger.warning("Keyword expansion parsing returned no queries.")
+            return []
+
+        # Filter out duplicates and queries that match the original
+        expanded_queries: list[str] = []
+        seen_lower: set[str] = {user_query.lower()}
+        for query in parsed_queries:
+            query_lower = query.lower()
+            if query_lower not in seen_lower:
+                seen_lower.add(query_lower)
+                expanded_queries.append(query)
+
+        logger.debug(f"Keyword expansion generated {len(expanded_queries)} queries")
+        return expanded_queries
+
+    except Exception as e:
+        logger.warning(f"Keyword expansion failed: {e}")
+        return []
--- a/backend/ee/onyx/secondary_llm_flows/search_flow_classification.py
+++ b/backend/ee/onyx/secondary_llm_flows/search_flow_classification.py
@@ -0,0 +1,50 @@
+from ee.onyx.prompts.search_flow_classification import CHAT_CLASS
+from ee.onyx.prompts.search_flow_classification import SEARCH_CHAT_PROMPT
+from ee.onyx.prompts.search_flow_classification import SEARCH_CLASS
+from onyx.llm.interfaces import LLM
+from onyx.llm.models import LanguageModelInput
+from onyx.llm.models import ReasoningEffort
+from onyx.llm.models import UserMessage
+from onyx.llm.utils import llm_response_to_string
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+
+@log_function_time(print_only=True)
+def classify_is_search_flow(
+    query: str,
+    llm: LLM,
+) -> bool:
+    messages: LanguageModelInput = [
+        UserMessage(content=SEARCH_CHAT_PROMPT.format(user_query=query))
+    ]
+    response = llm.invoke(
+        prompt=messages,
+        reasoning_effort=ReasoningEffort.OFF,
+        # Nothing can happen in the UI until this call finishes so we need to be aggressive with the timeout
+        timeout_override=2,
+        # Well more than necessary but just to ensure completion and in case it succeeds with classifying but
+        # ends up rambling
+        max_tokens=20,
+    )
+
+    content = llm_response_to_string(response).strip().lower()
+    if not content:
+        logger.warning(
+            "Search flow classification returned empty response; defaulting to chat flow."
+        )
+        return False
+
+    # Prefer chat if both appear.
+    if CHAT_CLASS in content:
+        return False
+    if SEARCH_CLASS in content:
+        return True
+
+    logger.warning(
+        "Search flow classification returned unexpected response; defaulting to chat flow. Response=%r",
+        content,
+    )
+    return False
--- a/backend/ee/onyx/server/analytics/api.py
+++ b/backend/ee/onyx/server/analytics/api.py
@@ -19,10 +19,11 @@ from ee.onyx.db.analytics import fetch_query_analytics
 from ee.onyx.db.analytics import user_can_view_assistant_stats
 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_user
+from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import User

-router = APIRouter(prefix="/analytics")
+router = APIRouter(prefix="/analytics", tags=PUBLIC_API_TAGS)


 _DEFAULT_LOOKBACK_DAYS = 30
--- a/backend/ee/onyx/server/enterprise_settings/models.py
+++ b/backend/ee/onyx/server/enterprise_settings/models.py
@@ -1,3 +1,4 @@
+from enum import Enum
 from typing import Any
 from typing import List

@@ -23,6 +24,12 @@ class NavigationItem(BaseModel):
        return instance


+class LogoDisplayStyle(str, Enum):
+    LOGO_AND_NAME = "logo_and_name"
+    LOGO_ONLY = "logo_only"
+    NAME_ONLY = "name_only"
+
+
 class EnterpriseSettings(BaseModel):
    """General settings that only apply to the Enterprise Edition of Onyx

@@ -31,6 +38,7 @@ class EnterpriseSettings(BaseModel):
    application_name: str | None = None
    use_custom_logo: bool = False
    use_custom_logotype: bool = False
+    logo_display_style: LogoDisplayStyle | None = None

    # custom navigation
    custom_nav_items: List[NavigationItem] = Field(default_factory=list)
@@ -42,6 +50,9 @@ class EnterpriseSettings(BaseModel):
    custom_popup_header: str | None = None
    custom_popup_content: str | None = None
    enable_consent_screen: bool | None = None
+    consent_screen_prompt: str | None = None
+    show_first_visit_notice: bool | None = None
+    custom_greeting_message: str | None = None

    def check_validity(self) -> None:
        return
--- a/backend/ee/onyx/server/license/api.py
+++ b/backend/ee/onyx/server/license/api.py
@@ -0,0 +1,246 @@
+"""License API endpoints."""
+
+import requests
+from fastapi import APIRouter
+from fastapi import Depends
+from fastapi import File
+from fastapi import HTTPException
+from fastapi import UploadFile
+from sqlalchemy.orm import Session
+
+from ee.onyx.auth.users import current_admin_user
+from ee.onyx.db.license import delete_license as db_delete_license
+from ee.onyx.db.license import get_license_metadata
+from ee.onyx.db.license import invalidate_license_cache
+from ee.onyx.db.license import refresh_license_cache
+from ee.onyx.db.license import update_license_cache
+from ee.onyx.db.license import upsert_license
+from ee.onyx.server.license.models import LicenseResponse
+from ee.onyx.server.license.models import LicenseSource
+from ee.onyx.server.license.models import LicenseStatusResponse
+from ee.onyx.server.license.models import LicenseUploadResponse
+from ee.onyx.server.license.models import SeatUsageResponse
+from ee.onyx.server.tenants.access import generate_data_plane_token
+from ee.onyx.utils.license import verify_license_signature
+from onyx.auth.users import User
+from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
+from onyx.db.engine.sql_engine import get_session
+from onyx.utils.logger import setup_logger
+from shared_configs.contextvars import get_current_tenant_id
+
+logger = setup_logger()
+
+router = APIRouter(prefix="/license")
+
+
+@router.get("")
+async def get_license_status(
+    _: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> LicenseStatusResponse:
+    """Get current license status and seat usage."""
+    metadata = get_license_metadata(db_session)
+
+    if not metadata:
+        return LicenseStatusResponse(has_license=False)
+
+    return LicenseStatusResponse(
+        has_license=True,
+        seats=metadata.seats,
+        used_seats=metadata.used_seats,
+        plan_type=metadata.plan_type,
+        issued_at=metadata.issued_at,
+        expires_at=metadata.expires_at,
+        grace_period_end=metadata.grace_period_end,
+        status=metadata.status,
+        source=metadata.source,
+    )
+
+
+@router.get("/seats")
+async def get_seat_usage(
+    _: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> SeatUsageResponse:
+    """Get detailed seat usage information."""
+    metadata = get_license_metadata(db_session)
+
+    if not metadata:
+        return SeatUsageResponse(
+            total_seats=0,
+            used_seats=0,
+            available_seats=0,
+        )
+
+    return SeatUsageResponse(
+        total_seats=metadata.seats,
+        used_seats=metadata.used_seats,
+        available_seats=max(0, metadata.seats - metadata.used_seats),
+    )
+
+
+@router.post("/fetch")
+async def fetch_license(
+    _: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> LicenseResponse:
+    """
+    Fetch license from control plane.
+    Used after Stripe checkout completion to retrieve the new license.
+    """
+    tenant_id = get_current_tenant_id()
+
+    try:
+        token = generate_data_plane_token()
+    except ValueError as e:
+        logger.error(f"Failed to generate data plane token: {e}")
+        raise HTTPException(
+            status_code=500, detail="Authentication configuration error"
+        )
+
+    try:
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json",
+        }
+        url = f"{CONTROL_PLANE_API_BASE_URL}/license/{tenant_id}"
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+
+        data = response.json()
+        if not isinstance(data, dict) or "license" not in data:
+            raise HTTPException(
+                status_code=502, detail="Invalid response from control plane"
+            )
+
+        license_data = data["license"]
+        if not license_data:
+            raise HTTPException(status_code=404, detail="No license found")
+
+        # Verify signature before persisting
+        payload = verify_license_signature(license_data)
+
+        # Verify the fetched license is for this tenant
+        if payload.tenant_id != tenant_id:
+            logger.error(
+                f"License tenant mismatch: expected {tenant_id}, got {payload.tenant_id}"
+            )
+            raise HTTPException(
+                status_code=400,
+                detail="License tenant ID mismatch - control plane returned wrong license",
+            )
+
+        # Persist to DB and update cache atomically
+        upsert_license(db_session, license_data)
+        try:
+            update_license_cache(payload, source=LicenseSource.AUTO_FETCH)
+        except Exception as cache_error:
+            # Log but don't fail - DB is source of truth, cache will refresh on next read
+            logger.warning(f"Failed to update license cache: {cache_error}")
+
+        return LicenseResponse(success=True, license=payload)
+
+    except requests.HTTPError as e:
+        status_code = e.response.status_code if e.response is not None else 502
+        logger.error(f"Control plane returned error: {status_code}")
+        raise HTTPException(
+            status_code=status_code,
+            detail="Failed to fetch license from control plane",
+        )
+    except ValueError as e:
+        logger.error(f"License verification failed: {type(e).__name__}")
+        raise HTTPException(status_code=400, detail=str(e))
+    except requests.RequestException:
+        logger.exception("Failed to fetch license from control plane")
+        raise HTTPException(
+            status_code=502, detail="Failed to connect to control plane"
+        )
+
+
+@router.post("/upload")
+async def upload_license(
+    license_file: UploadFile = File(...),
+    _: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> LicenseUploadResponse:
+    """
+    Upload a license file manually.
+    Used for air-gapped deployments where control plane is not accessible.
+    """
+    try:
+        content = await license_file.read()
+        license_data = content.decode("utf-8").strip()
+    except UnicodeDecodeError:
+        raise HTTPException(status_code=400, detail="Invalid license file format")
+
+    try:
+        payload = verify_license_signature(license_data)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+    tenant_id = get_current_tenant_id()
+    if payload.tenant_id != tenant_id:
+        raise HTTPException(
+            status_code=400,
+            detail=f"License tenant ID mismatch. Expected {tenant_id}, got {payload.tenant_id}",
+        )
+
+    # Persist to DB and update cache
+    upsert_license(db_session, license_data)
+    try:
+        update_license_cache(payload, source=LicenseSource.MANUAL_UPLOAD)
+    except Exception as cache_error:
+        # Log but don't fail - DB is source of truth, cache will refresh on next read
+        logger.warning(f"Failed to update license cache: {cache_error}")
+
+    return LicenseUploadResponse(
+        success=True,
+        message=f"License uploaded successfully. {payload.seats} seats, expires {payload.expires_at.date()}",
+    )
+
+
+@router.post("/refresh")
+async def refresh_license_cache_endpoint(
+    _: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> LicenseStatusResponse:
+    """
+    Force refresh the license cache from the database.
+    Useful after manual database changes or to verify license validity.
+    """
+    metadata = refresh_license_cache(db_session)
+
+    if not metadata:
+        return LicenseStatusResponse(has_license=False)
+
+    return LicenseStatusResponse(
+        has_license=True,
+        seats=metadata.seats,
+        used_seats=metadata.used_seats,
+        plan_type=metadata.plan_type,
+        issued_at=metadata.issued_at,
+        expires_at=metadata.expires_at,
+        grace_period_end=metadata.grace_period_end,
+        status=metadata.status,
+        source=metadata.source,
+    )
+
+
+@router.delete("")
+async def delete_license(
+    _: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> dict[str, bool]:
+    """
+    Delete the current license.
+    Admin only - removes license and invalidates cache.
+    """
+    # Invalidate cache first - if DB delete fails, stale cache is worse than no cache
+    try:
+        invalidate_license_cache()
+    except Exception as cache_error:
+        logger.warning(f"Failed to invalidate license cache: {cache_error}")
+
+    deleted = db_delete_license(db_session)
+
+    return {"deleted": deleted}
--- a/backend/ee/onyx/server/license/models.py
+++ b/backend/ee/onyx/server/license/models.py
@@ -0,0 +1,92 @@
+from datetime import datetime
+from enum import Enum
+
+from pydantic import BaseModel
+
+from onyx.server.settings.models import ApplicationStatus
+
+
+class PlanType(str, Enum):
+    MONTHLY = "monthly"
+    ANNUAL = "annual"
+
+
+class LicenseSource(str, Enum):
+    AUTO_FETCH = "auto_fetch"
+    MANUAL_UPLOAD = "manual_upload"
+
+
+class LicensePayload(BaseModel):
+    """The payload portion of a signed license."""
+
+    version: str
+    tenant_id: str
+    organization_name: str | None = None
+    issued_at: datetime
+    expires_at: datetime
+    seats: int
+    plan_type: PlanType
+    billing_cycle: str | None = None
+    grace_period_days: int = 30
+    stripe_subscription_id: str | None = None
+    stripe_customer_id: str | None = None
+
+
+class LicenseData(BaseModel):
+    """Full signed license structure."""
+
+    payload: LicensePayload
+    signature: str
+
+
+class LicenseMetadata(BaseModel):
+    """Cached license metadata stored in Redis."""
+
+    tenant_id: str
+    organization_name: str | None = None
+    seats: int
+    used_seats: int
+    plan_type: PlanType
+    issued_at: datetime
+    expires_at: datetime
+    grace_period_end: datetime | None = None
+    status: ApplicationStatus
+    source: LicenseSource | None = None
+    stripe_subscription_id: str | None = None
+
+
+class LicenseStatusResponse(BaseModel):
+    """Response for license status API."""
+
+    has_license: bool
+    seats: int = 0
+    used_seats: int = 0
+    plan_type: PlanType | None = None
+    issued_at: datetime | None = None
+    expires_at: datetime | None = None
+    grace_period_end: datetime | None = None
+    status: ApplicationStatus | None = None
+    source: LicenseSource | None = None
+
+
+class LicenseResponse(BaseModel):
+    """Response after license fetch/upload."""
+
+    success: bool
+    message: str | None = None
+    license: LicensePayload | None = None
+
+
+class LicenseUploadResponse(BaseModel):
+    """Response after license upload."""
+
+    success: bool
+    message: str | None = None
+
+
+class SeatUsageResponse(BaseModel):
+    """Response for seat usage API."""
+
+    total_seats: int
+    used_seats: int
+    available_seats: int
--- a/backend/ee/onyx/server/middleware/license_enforcement.py
+++ b/backend/ee/onyx/server/middleware/license_enforcement.py
@@ -0,0 +1,102 @@
+"""Middleware to enforce license status application-wide."""
+
+import logging
+from collections.abc import Awaitable
+from collections.abc import Callable
+
+from fastapi import FastAPI
+from fastapi import Request
+from fastapi import Response
+from fastapi.responses import JSONResponse
+from redis.exceptions import RedisError
+
+from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
+from ee.onyx.db.license import get_cached_license_metadata
+from ee.onyx.server.tenants.product_gating import is_tenant_gated
+from onyx.server.settings.models import ApplicationStatus
+from shared_configs.configs import MULTI_TENANT
+from shared_configs.contextvars import get_current_tenant_id
+
+# Paths that are ALWAYS accessible, even when license is expired/gated.
+# These enable users to:
+#   /auth - Log in/out (users can't fix billing if locked out of auth)
+#   /license - Fetch, upload, or check license status
+#   /health - Health checks for load balancers/orchestrators
+#   /me - Basic user info needed for UI rendering
+#   /settings, /enterprise-settings - View app status and branding
+#   /tenants/billing-* - Manage subscription to resolve gating
+ALLOWED_PATH_PREFIXES = {
+    "/auth",
+    "/license",
+    "/health",
+    "/me",
+    "/settings",
+    "/enterprise-settings",
+    "/tenants/billing-information",
+    "/tenants/create-customer-portal-session",
+    "/tenants/create-subscription-session",
+}
+
+
+def _is_path_allowed(path: str) -> bool:
+    """Check if path is in allowlist (prefix match)."""
+    return any(path.startswith(prefix) for prefix in ALLOWED_PATH_PREFIXES)
+
+
+def add_license_enforcement_middleware(
+    app: FastAPI, logger: logging.LoggerAdapter
+) -> None:
+    logger.info("License enforcement middleware registered")
+
+    @app.middleware("http")
+    async def enforce_license(
+        request: Request, call_next: Callable[[Request], Awaitable[Response]]
+    ) -> Response:
+        """Block requests when license is expired/gated."""
+        if not LICENSE_ENFORCEMENT_ENABLED:
+            return await call_next(request)
+
+        path = request.url.path
+        if path.startswith("/api"):
+            path = path[4:]
+
+        if _is_path_allowed(path):
+            return await call_next(request)
+
+        is_gated = False
+        tenant_id = get_current_tenant_id()
+
+        if MULTI_TENANT:
+            try:
+                is_gated = is_tenant_gated(tenant_id)
+            except RedisError as e:
+                logger.warning(f"Failed to check tenant gating status: {e}")
+                # Fail open - don't block users due to Redis connectivity issues
+                is_gated = False
+        else:
+            try:
+                metadata = get_cached_license_metadata(tenant_id)
+                if metadata:
+                    if metadata.status == ApplicationStatus.GATED_ACCESS:
+                        is_gated = True
+                else:
+                    # No license metadata = gated for self-hosted EE
+                    is_gated = True
+            except RedisError as e:
+                logger.warning(f"Failed to check license metadata: {e}")
+                # Fail open - don't block users due to Redis connectivity issues
+                is_gated = False
+
+        if is_gated:
+            logger.info(f"Blocking request for gated tenant: {tenant_id}, path={path}")
+            return JSONResponse(
+                status_code=402,
+                content={
+                    "detail": {
+                        "error": "license_expired",
+                        "message": "Your subscription has expired. Please update your billing.",
+                    }
+                },
+            )
+
+        return await call_next(request)
--- a/backend/ee/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/chat_backend.py
@@ -1,218 +0,0 @@
-from fastapi import APIRouter
-from fastapi import Depends
-from fastapi import HTTPException
-from sqlalchemy.orm import Session
-
-from ee.onyx.server.query_and_chat.models import BasicCreateChatMessageRequest
-from ee.onyx.server.query_and_chat.models import (
-    BasicCreateChatMessageWithHistoryRequest,
-)
-from onyx.auth.users import current_user
-from onyx.chat.chat_utils import create_chat_history_chain
-from onyx.chat.models import ChatBasicResponse
-from onyx.chat.process_message import gather_stream
-from onyx.chat.process_message import stream_chat_message_objects
-from onyx.configs.constants import MessageType
-from onyx.context.search.models import OptionalSearchSetting
-from onyx.context.search.models import RetrievalDetails
-from onyx.db.chat import create_chat_session
-from onyx.db.chat import create_new_chat_message
-from onyx.db.chat import get_or_create_root_message
-from onyx.db.engine.sql_engine import get_session
-from onyx.db.models import User
-from onyx.llm.factory import get_llms_for_persona
-from onyx.natural_language_processing.utils import get_tokenizer
-from onyx.server.query_and_chat.models import CreateChatMessageRequest
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-router = APIRouter(prefix="/chat")
-
-
-@router.post("/send-message-simple-api")
-def handle_simplified_chat_message(
-    chat_message_req: BasicCreateChatMessageRequest,
-    user: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> ChatBasicResponse:
-    """This is a Non-Streaming version that only gives back a minimal set of information"""
-    logger.notice(f"Received new simple api chat message: {chat_message_req.message}")
-
-    if not chat_message_req.message:
-        raise HTTPException(status_code=400, detail="Empty chat message is invalid")
-
-    # Handle chat session creation if chat_session_id is not provided
-    if chat_message_req.chat_session_id is None:
-        if chat_message_req.persona_id is None:
-            raise HTTPException(
-                status_code=400,
-                detail="Either chat_session_id or persona_id must be provided",
-            )
-
-        # Create a new chat session with the provided persona_id
-        try:
-            new_chat_session = create_chat_session(
-                db_session=db_session,
-                description="",  # Leave empty for simple API
-                user_id=user.id if user else None,
-                persona_id=chat_message_req.persona_id,
-            )
-            chat_session_id = new_chat_session.id
-        except Exception as e:
-            logger.exception(e)
-            raise HTTPException(status_code=400, detail="Invalid Persona provided.")
-    else:
-        chat_session_id = chat_message_req.chat_session_id
-
-    try:
-        parent_message = create_chat_history_chain(
-            chat_session_id=chat_session_id, db_session=db_session
-        )[-1]
-    except Exception:
-        parent_message = get_or_create_root_message(
-            chat_session_id=chat_session_id, db_session=db_session
-        )
-
-    if (
-        chat_message_req.retrieval_options is None
-        and chat_message_req.search_doc_ids is None
-    ):
-        retrieval_options: RetrievalDetails | None = RetrievalDetails(
-            run_search=OptionalSearchSetting.ALWAYS,
-            real_time=False,
-        )
-    else:
-        retrieval_options = chat_message_req.retrieval_options
-
-    full_chat_msg_info = CreateChatMessageRequest(
-        chat_session_id=chat_session_id,
-        parent_message_id=parent_message.id,
-        message=chat_message_req.message,
-        file_descriptors=[],
-        search_doc_ids=chat_message_req.search_doc_ids,
-        retrieval_options=retrieval_options,
-        # Simple API does not support reranking, hide complexity from user
-        rerank_settings=None,
-        query_override=chat_message_req.query_override,
-        # Currently only applies to search flow not chat
-        chunks_above=0,
-        chunks_below=0,
-        full_doc=chat_message_req.full_doc,
-        structured_response_format=chat_message_req.structured_response_format,
-        use_agentic_search=chat_message_req.use_agentic_search,
-    )
-
-    packets = stream_chat_message_objects(
-        new_msg_req=full_chat_msg_info,
-        user=user,
-        db_session=db_session,
-        enforce_chat_session_id_for_search_docs=False,
-    )
-
-    return gather_stream(packets)
-
-
-@router.post("/send-message-simple-with-history")
-def handle_send_message_simple_with_history(
-    req: BasicCreateChatMessageWithHistoryRequest,
-    user: User | None = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> ChatBasicResponse:
-    """This is a Non-Streaming version that only gives back a minimal set of information.
-    takes in chat history maintained by the caller
-    and does query rephrasing similar to answer-with-quote"""
-
-    if len(req.messages) == 0:
-        raise HTTPException(status_code=400, detail="Messages cannot be zero length")
-
-    # This is a sanity check to make sure the chat history is valid
-    # It must start with a user message and alternate beteen user and assistant
-    expected_role = MessageType.USER
-    for msg in req.messages:
-        if not msg.message:
-            raise HTTPException(
-                status_code=400, detail="One or more chat messages were empty"
-            )
-
-        if msg.role != expected_role:
-            raise HTTPException(
-                status_code=400,
-                detail="Message roles must start and end with MessageType.USER and alternate in-between.",
-            )
-        if expected_role == MessageType.USER:
-            expected_role = MessageType.ASSISTANT
-        else:
-            expected_role = MessageType.USER
-
-    query = req.messages[-1].message
-    msg_history = req.messages[:-1]
-
-    logger.notice(f"Received new simple with history chat message: {query}")
-
-    user_id = user.id if user is not None else None
-    chat_session = create_chat_session(
-        db_session=db_session,
-        description="handle_send_message_simple_with_history",
-        user_id=user_id,
-        persona_id=req.persona_id,
-    )
-
-    llm, _ = get_llms_for_persona(persona=chat_session.persona, user=user)
-
-    llm_tokenizer = get_tokenizer(
-        model_name=llm.config.model_name,
-        provider_type=llm.config.model_provider,
-    )
-
-    # Every chat Session begins with an empty root message
-    root_message = get_or_create_root_message(
-        chat_session_id=chat_session.id, db_session=db_session
-    )
-
-    chat_message = root_message
-    for msg in msg_history:
-        chat_message = create_new_chat_message(
-            chat_session_id=chat_session.id,
-            parent_message=chat_message,
-            message=msg.message,
-            token_count=len(llm_tokenizer.encode(msg.message)),
-            message_type=msg.role,
-            db_session=db_session,
-            commit=False,
-        )
-    db_session.commit()
-
-    if req.retrieval_options is None and req.search_doc_ids is None:
-        retrieval_options: RetrievalDetails | None = RetrievalDetails(
-            run_search=OptionalSearchSetting.ALWAYS,
-            real_time=False,
-        )
-    else:
-        retrieval_options = req.retrieval_options
-
-    full_chat_msg_info = CreateChatMessageRequest(
-        chat_session_id=chat_session.id,
-        parent_message_id=chat_message.id,
-        message=query,
-        file_descriptors=[],
-        search_doc_ids=req.search_doc_ids,
-        retrieval_options=retrieval_options,
-        # Simple API does not support reranking, hide complexity from user
-        rerank_settings=None,
-        query_override=None,
-        chunks_above=0,
-        chunks_below=0,
-        full_doc=req.full_doc,
-        structured_response_format=req.structured_response_format,
-        use_agentic_search=req.use_agentic_search,
-    )
-
-    packets = stream_chat_message_objects(
-        new_msg_req=full_chat_msg_info,
-        user=user,
-        db_session=db_session,
-        enforce_chat_session_id_for_search_docs=False,
-    )
-
-    return gather_stream(packets)
--- a/backend/ee/onyx/server/query_and_chat/models.py
+++ b/backend/ee/onyx/server/query_and_chat/models.py
@@ -1,18 +1,12 @@
-from collections import OrderedDict
-from typing import Literal
-from uuid import UUID
+from collections.abc import Sequence
+from datetime import datetime

 from pydantic import BaseModel
 from pydantic import Field
-from pydantic import model_validator

-from onyx.chat.models import ThreadMessage
-from onyx.configs.constants import DocumentSource
 from onyx.context.search.models import BaseFilters
-from onyx.context.search.models import BasicChunkRequest
-from onyx.context.search.models import ChunkContext
-from onyx.context.search.models import InferenceChunk
-from onyx.context.search.models import RetrievalDetails
+from onyx.context.search.models import InferenceSection
+from onyx.context.search.models import SearchDoc
 from onyx.server.manage.models import StandardAnswer


@@ -25,124 +19,88 @@ class StandardAnswerResponse(BaseModel):
    standard_answers: list[StandardAnswer] = Field(default_factory=list)


-class DocumentSearchRequest(BasicChunkRequest):
-    user_selected_filters: BaseFilters | None = None
+class SearchFlowClassificationRequest(BaseModel):
+    user_query: str


-class DocumentSearchResponse(BaseModel):
-    top_documents: list[InferenceChunk]
+class SearchFlowClassificationResponse(BaseModel):
+    is_search_flow: bool


-class BasicCreateChatMessageRequest(ChunkContext):
-    """If a chat_session_id is not provided, a persona_id must be provided to automatically create a new chat session
-    Note, for simplicity this option only allows for a single linear chain of messages
-    """
+class SendSearchQueryRequest(BaseModel):
+    search_query: str
+    filters: BaseFilters | None = None
+    num_docs_fed_to_llm_selection: int | None = None
+    run_query_expansion: bool = False

-    chat_session_id: UUID | None = None
-    # Optional persona_id to create a new chat session if chat_session_id is not provided
-    persona_id: int | None = None
-    # New message contents
-    message: str
-    # Defaults to using retrieval with no additional filters
-    retrieval_options: RetrievalDetails | None = None
-    # Allows the caller to specify the exact search query they want to use
-    # will disable Query Rewording if specified
-    query_override: str | None = None
-    # If search_doc_ids provided, then retrieval options are unused
-    search_doc_ids: list[int] | None = None
-    # only works if using an OpenAI model. See the following for more details:
-    # https://platform.openai.com/docs/guides/structured-outputs/introduction
-    structured_response_format: dict | None = None
-
-    # If True, uses agentic search instead of basic search
-    use_agentic_search: bool = False
-
-    @model_validator(mode="after")
-    def validate_chat_session_or_persona(self) -> "BasicCreateChatMessageRequest":
-        if self.chat_session_id is None and self.persona_id is None:
-            raise ValueError("Either chat_session_id or persona_id must be provided")
-        return self
+    include_content: bool = False
+    stream: bool = False


-class BasicCreateChatMessageWithHistoryRequest(ChunkContext):
-    # Last element is the new query. All previous elements are historical context
-    messages: list[ThreadMessage]
-    persona_id: int
-    retrieval_options: RetrievalDetails | None = None
-    query_override: str | None = None
-    skip_rerank: bool | None = None
-    # If search_doc_ids provided, then retrieval options are unused
-    search_doc_ids: list[int] | None = None
-    # only works if using an OpenAI model. See the following for more details:
-    # https://platform.openai.com/docs/guides/structured-outputs/introduction
-    structured_response_format: dict | None = None
-    # If True, uses agentic search instead of basic search
-    use_agentic_search: bool = False
+class SearchDocWithContent(SearchDoc):
+    # Allows None because this is determined by a flag but the object used in code
+    # of the search path uses this type
+    content: str | None

+    @classmethod
+    def from_inference_sections(
+        cls,
+        sections: Sequence[InferenceSection],
+        include_content: bool = False,
+        is_internet: bool = False,
+    ) -> list["SearchDocWithContent"]:
+        """Convert InferenceSections to SearchDocWithContent objects.

-class SimpleDoc(BaseModel):
-    id: str
-    semantic_identifier: str
-    link: str | None
-    blurb: str
-    match_highlights: list[str]
-    source_type: DocumentSource
-    metadata: dict | None
+        Args:
+            sections: Sequence of InferenceSection objects
+            include_content: If True, populate content field with combined_content
+            is_internet: Whether these are internet search results

-
-class AgentSubQuestion(BaseModel):
-    sub_question: str
-    document_ids: list[str]
-
-
-class AgentAnswer(BaseModel):
-    answer: str
-    answer_type: Literal["agent_sub_answer", "agent_level_answer"]
-
-
-class AgentSubQuery(BaseModel):
-    sub_query: str
-    query_id: int
-
-    @staticmethod
-    def make_dict_by_level_and_question_index(
-        original_dict: dict[tuple[int, int, int], "AgentSubQuery"],
-    ) -> dict[int, dict[int, list["AgentSubQuery"]]]:
-        """Takes a dict of tuple(level, question num, query_id) to sub queries.
-
-        returns a dict of level to dict[question num to list of query_id's]
-        Ordering is asc for readability.
+        Returns:
+            List of SearchDocWithContent with optional content
        """
-        # In this function, when we sort int | None, we deliberately push None to the end
+        if not sections:
+            return []

-        # map entries to the level_question_dict
-        level_question_dict: dict[int, dict[int, list["AgentSubQuery"]]] = {}
-        for k1, obj in original_dict.items():
-            level = k1[0]
-            question = k1[1]
-
-            if level not in level_question_dict:
-                level_question_dict[level] = {}
-
-            if question not in level_question_dict[level]:
-                level_question_dict[level][question] = []
-
-            level_question_dict[level][question].append(obj)
-
-        # sort each query_id list and question_index
-        for key1, obj1 in level_question_dict.items():
-            for key2, value2 in obj1.items():
-                # sort the query_id list of each question_index
-                level_question_dict[key1][key2] = sorted(
-                    value2, key=lambda o: o.query_id
-                )
-            # sort the question_index dict of level
-            level_question_dict[key1] = OrderedDict(
-                sorted(level_question_dict[key1].items(), key=lambda x: (x is None, x))
+        return [
+            cls(
+                document_id=(chunk := section.center_chunk).document_id,
+                chunk_ind=chunk.chunk_id,
+                semantic_identifier=chunk.semantic_identifier or "Unknown",
+                link=chunk.source_links[0] if chunk.source_links else None,
+                blurb=chunk.blurb,
+                source_type=chunk.source_type,
+                boost=chunk.boost,
+                hidden=chunk.hidden,
+                metadata=chunk.metadata,
+                score=chunk.score,
+                match_highlights=chunk.match_highlights,
+                updated_at=chunk.updated_at,
+                primary_owners=chunk.primary_owners,
+                secondary_owners=chunk.secondary_owners,
+                is_internet=is_internet,
+                content=section.combined_content if include_content else None,
            )
+            for section in sections
+        ]

-        # sort the top dict of levels
-        sorted_dict = OrderedDict(
-            sorted(level_question_dict.items(), key=lambda x: (x is None, x))
-        )
-        return sorted_dict
+
+class SearchFullResponse(BaseModel):
+    all_executed_queries: list[str]
+    search_docs: list[SearchDocWithContent]
+    # Reasoning tokens output by the LLM for the document selection
+    doc_selection_reasoning: str | None = None
+    # This a list of document ids that are in the search_docs list
+    llm_selected_doc_ids: list[str] | None = None
+    # Error message if the search failed partway through
+    error: str | None = None
+
+
+class SearchQueryResponse(BaseModel):
+    query: str
+    query_expansions: list[str] | None
+    created_at: datetime
+
+
+class SearchHistoryResponse(BaseModel):
+    search_queries: list[SearchQueryResponse]
--- a/backend/ee/onyx/server/query_and_chat/search_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/search_backend.py
@@ -0,0 +1,170 @@
+from collections.abc import Generator
+
+from fastapi import APIRouter
+from fastapi import Depends
+from fastapi import HTTPException
+from fastapi.responses import StreamingResponse
+from sqlalchemy.orm import Session
+
+from ee.onyx.db.search import fetch_search_queries_for_user
+from ee.onyx.search.process_search_query import gather_search_stream
+from ee.onyx.search.process_search_query import stream_search_query
+from ee.onyx.secondary_llm_flows.search_flow_classification import (
+    classify_is_search_flow,
+)
+from ee.onyx.server.query_and_chat.models import SearchFlowClassificationRequest
+from ee.onyx.server.query_and_chat.models import SearchFlowClassificationResponse
+from ee.onyx.server.query_and_chat.models import SearchFullResponse
+from ee.onyx.server.query_and_chat.models import SearchHistoryResponse
+from ee.onyx.server.query_and_chat.models import SearchQueryResponse
+from ee.onyx.server.query_and_chat.models import SendSearchQueryRequest
+from ee.onyx.server.query_and_chat.streaming_models import SearchErrorPacket
+from onyx.auth.users import current_user
+from onyx.db.engine.sql_engine import get_session
+from onyx.db.engine.sql_engine import get_session_with_current_tenant
+from onyx.db.models import User
+from onyx.llm.factory import get_default_llm
+from onyx.server.usage_limits import check_llm_cost_limit_for_provider
+from onyx.server.utils import get_json_line
+from onyx.utils.logger import setup_logger
+from shared_configs.contextvars import get_current_tenant_id
+
+logger = setup_logger()
+
+router = APIRouter(prefix="/search")
+
+
+@router.post("/search-flow-classification")
+def search_flow_classification(
+    request: SearchFlowClassificationRequest,
+    # This is added just to ensure this endpoint isn't spammed by non-authorized users since there's an LLM call underneath it
+    _: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> SearchFlowClassificationResponse:
+    query = request.user_query
+    # This is a heuristic that if the user is typing a lot of text, it's unlikely they're looking for some specific document
+    # Most likely something needs to be done with the text included so we'll just classify it as a chat flow
+    if len(query) > 200:
+        return SearchFlowClassificationResponse(is_search_flow=False)
+
+    llm = get_default_llm()
+
+    check_llm_cost_limit_for_provider(
+        db_session=db_session,
+        tenant_id=get_current_tenant_id(),
+        llm_provider_api_key=llm.config.api_key,
+    )
+
+    try:
+        is_search_flow = classify_is_search_flow(query=query, llm=llm)
+    except Exception as e:
+        logger.exception(
+            "Search flow classification failed; defaulting to chat flow",
+            exc_info=e,
+        )
+        is_search_flow = False
+
+    return SearchFlowClassificationResponse(is_search_flow=is_search_flow)
+
+
+@router.post("/send-search-message", response_model=None)
+def handle_send_search_message(
+    request: SendSearchQueryRequest,
+    user: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> StreamingResponse | SearchFullResponse:
+    """
+    Execute a search query with optional streaming.
+
+    When stream=True: Returns StreamingResponse with SSE
+    When stream=False: Returns SearchFullResponse
+    """
+    logger.debug(f"Received search query: {request.search_query}")
+
+    # Non-streaming path
+    if not request.stream:
+        try:
+            packets = stream_search_query(request, user, db_session)
+            return gather_search_stream(packets)
+        except NotImplementedError as e:
+            return SearchFullResponse(
+                all_executed_queries=[],
+                search_docs=[],
+                error=str(e),
+            )
+
+    # Streaming path
+    def stream_generator() -> Generator[str, None, None]:
+        try:
+            with get_session_with_current_tenant() as streaming_db_session:
+                for packet in stream_search_query(request, user, streaming_db_session):
+                    yield get_json_line(packet.model_dump())
+        except NotImplementedError as e:
+            yield get_json_line(SearchErrorPacket(error=str(e)).model_dump())
+        except HTTPException:
+            raise
+        except Exception as e:
+            logger.exception("Error in search streaming")
+            yield get_json_line(SearchErrorPacket(error=str(e)).model_dump())
+
+    return StreamingResponse(stream_generator(), media_type="text/event-stream")
+
+
+@router.get("/search-history")
+def get_search_history(
+    limit: int = 100,
+    filter_days: int | None = None,
+    user: User | None = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> SearchHistoryResponse:
+    """
+    Fetch past search queries for the authenticated user.
+
+    Args:
+        limit: Maximum number of queries to return (default 100)
+        filter_days: Only return queries from the last N days (optional)
+
+    Returns:
+        SearchHistoryResponse with list of search queries, ordered by most recent first.
+    """
+    # Validate limit
+    if limit <= 0:
+        raise HTTPException(
+            status_code=400,
+            detail="limit must be greater than 0",
+        )
+    if limit > 1000:
+        raise HTTPException(
+            status_code=400,
+            detail="limit must be at most 1000",
+        )
+
+    # Validate filter_days
+    if filter_days is not None and filter_days <= 0:
+        raise HTTPException(
+            status_code=400,
+            detail="filter_days must be greater than 0",
+        )
+
+    # TODO(yuhong) remove this
+    if user is None:
+        # Return empty list for unauthenticated users
+        return SearchHistoryResponse(search_queries=[])
+
+    search_queries = fetch_search_queries_for_user(
+        db_session=db_session,
+        user_id=user.id,
+        filter_days=filter_days,
+        limit=limit,
+    )
+
+    return SearchHistoryResponse(
+        search_queries=[
+            SearchQueryResponse(
+                query=sq.query,
+                query_expansions=sq.query_expansions,
+                created_at=sq.created_at,
+            )
+            for sq in search_queries
+        ]
+    )
--- a/backend/ee/onyx/server/query_and_chat/streaming_models.py
+++ b/backend/ee/onyx/server/query_and_chat/streaming_models.py
@@ -0,0 +1,35 @@
+from typing import Literal
+
+from pydantic import BaseModel
+from pydantic import ConfigDict
+
+from ee.onyx.server.query_and_chat.models import SearchDocWithContent
+
+
+class SearchQueriesPacket(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    type: Literal["search_queries"] = "search_queries"
+    all_executed_queries: list[str]
+
+
+class SearchDocsPacket(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    type: Literal["search_docs"] = "search_docs"
+    search_docs: list[SearchDocWithContent]
+
+
+class SearchErrorPacket(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    type: Literal["search_error"] = "search_error"
+    error: str
+
+
+class LLMSelectedDocsPacket(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+    type: Literal["llm_selected_docs"] = "llm_selected_docs"
+    # None if LLM selection failed, empty list if no docs selected, list of IDs otherwise
+    llm_selected_doc_ids: list[str] | None
--- a/backend/ee/onyx/server/query_history/api.py
+++ b/backend/ee/onyx/server/query_history/api.py
@@ -32,6 +32,7 @@ from onyx.configs.constants import MessageType
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
+from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.configs.constants import QAFeedbackType
 from onyx.configs.constants import QueryHistoryType
 from onyx.configs.constants import SessionType
@@ -294,7 +295,7 @@ def list_all_query_history_exports(
        )


-@router.post("/admin/query-history/start-export")
+@router.post("/admin/query-history/start-export", tags=PUBLIC_API_TAGS)
 def start_query_history_export(
    _: User | None = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
@@ -340,7 +341,7 @@ def start_query_history_export(
    return {"request_id": task_id}


-@router.get("/admin/query-history/export-status")
+@router.get("/admin/query-history/export-status", tags=PUBLIC_API_TAGS)
 def get_query_history_export_status(
    request_id: str,
    _: User | None = Depends(current_admin_user),
@@ -374,7 +375,7 @@ def get_query_history_export_status(
    return {"status": TaskStatus.SUCCESS}


-@router.get("/admin/query-history/download")
+@router.get("/admin/query-history/download", tags=PUBLIC_API_TAGS)
 def download_query_history_csv(
    request_id: str,
    _: User | None = Depends(current_admin_user),
--- a/backend/ee/onyx/server/settings/init.py
+++ b/backend/ee/onyx/server/settings/init.py
--- a/backend/ee/onyx/server/settings/api.py
+++ b/backend/ee/onyx/server/settings/api.py
@@ -0,0 +1,54 @@
+"""EE Settings API - provides license-aware settings override."""
+
+from redis.exceptions import RedisError
+
+from ee.onyx.configs.app_configs import LICENSE_ENFORCEMENT_ENABLED
+from ee.onyx.db.license import get_cached_license_metadata
+from onyx.server.settings.models import ApplicationStatus
+from onyx.server.settings.models import Settings
+from onyx.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT
+from shared_configs.contextvars import get_current_tenant_id
+
+logger = setup_logger()
+
+# Statuses that indicate a billing/license problem - propagate these to settings
+_GATED_STATUSES = frozenset(
+    {
+        ApplicationStatus.GATED_ACCESS,
+        ApplicationStatus.GRACE_PERIOD,
+        ApplicationStatus.PAYMENT_REMINDER,
+    }
+)
+
+
+def apply_license_status_to_settings(settings: Settings) -> Settings:
+    """EE version: checks license status for self-hosted deployments.
+
+    For self-hosted, looks up license metadata and overrides application_status
+    if the license is missing or indicates a problem (expired, grace period, etc.).
+
+    For multi-tenant (cloud), the settings already have the correct status
+    from the control plane, so no override is needed.
+
+    If LICENSE_ENFORCEMENT_ENABLED is false, settings are returned unchanged,
+    allowing the product to function normally without license checks.
+    """
+    if not LICENSE_ENFORCEMENT_ENABLED:
+        return settings
+
+    if MULTI_TENANT:
+        return settings
+
+    tenant_id = get_current_tenant_id()
+    try:
+        metadata = get_cached_license_metadata(tenant_id)
+        if metadata and metadata.status in _GATED_STATUSES:
+            settings.application_status = metadata.status
+        elif not metadata:
+            # No license = gated access for self-hosted EE
+            settings.application_status = ApplicationStatus.GATED_ACCESS
+    except RedisError as e:
+        logger.warning(f"Failed to check license metadata for settings: {e}")
+
+    return settings
--- a/backend/ee/onyx/server/tenant_usage_limits.py
+++ b/backend/ee/onyx/server/tenant_usage_limits.py
@@ -0,0 +1,133 @@
+"""Tenant-specific usage limit overrides from the control plane (EE version)."""
+
+import time
+
+import requests
+
+from ee.onyx.server.tenants.access import generate_data_plane_token
+from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
+from onyx.configs.app_configs import DEV_MODE
+from onyx.server.tenant_usage_limits import TenantUsageLimitOverrides
+from onyx.server.usage_limits import NO_LIMIT
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+# In-memory storage for tenant overrides (populated at startup)
+_tenant_usage_limit_overrides: dict[str, TenantUsageLimitOverrides] | None = None
+_last_fetch_time: float = 0.0
+_FETCH_INTERVAL = 60 * 60 * 24  # 24 hours
+_ERROR_FETCH_INTERVAL = 30 * 60  # 30 minutes (if the last fetch failed)
+
+
+def fetch_usage_limit_overrides() -> dict[str, TenantUsageLimitOverrides] | None:
+    """
+    Fetch tenant-specific usage limit overrides from the control plane.
+
+    Returns:
+        Dictionary mapping tenant_id to their specific limit overrides.
+        Returns empty dict on any error (falls back to defaults).
+    """
+    try:
+        token = generate_data_plane_token()
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json",
+        }
+        url = f"{CONTROL_PLANE_API_BASE_URL}/usage-limit-overrides"
+        response = requests.get(url, headers=headers, timeout=30)
+        response.raise_for_status()
+
+        tenant_overrides = response.json()
+
+        # Parse each tenant's overrides
+        result: dict[str, TenantUsageLimitOverrides] = {}
+        for override_data in tenant_overrides:
+            tenant_id = override_data["tenant_id"]
+            try:
+                result[tenant_id] = TenantUsageLimitOverrides(**override_data)
+            except Exception as e:
+                logger.warning(
+                    f"Failed to parse usage limit overrides for tenant {tenant_id}: {e}"
+                )
+
+        return (
+            result or None
+        )  # if empty dictionary, something went wrong and we shouldn't enforce limits
+
+    except requests.exceptions.RequestException as e:
+        logger.warning(f"Failed to fetch usage limit overrides from control plane: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Error parsing usage limit overrides: {e}")
+        return None
+
+
+def load_usage_limit_overrides() -> None:
+    """
+    Load tenant usage limit overrides from the control plane.
+    """
+    global _tenant_usage_limit_overrides
+    global _last_fetch_time
+
+    logger.info("Loading tenant usage limit overrides from control plane...")
+    overrides = fetch_usage_limit_overrides()
+
+    _last_fetch_time = time.time()
+
+    # use the new result if it exists, otherwise use the old result
+    # (prevents us from updating to a failed fetch result)
+    _tenant_usage_limit_overrides = overrides or _tenant_usage_limit_overrides
+
+    if overrides:
+        logger.info(f"Loaded usage limit overrides for {len(overrides)} tenants")
+    else:
+        logger.info("No tenant-specific usage limit overrides found")
+
+
+def unlimited(tenant_id: str) -> TenantUsageLimitOverrides:
+    return TenantUsageLimitOverrides(
+        tenant_id=tenant_id,
+        llm_cost_cents_trial=NO_LIMIT,
+        llm_cost_cents_paid=NO_LIMIT,
+        chunks_indexed_trial=NO_LIMIT,
+        chunks_indexed_paid=NO_LIMIT,
+        api_calls_trial=NO_LIMIT,
+        api_calls_paid=NO_LIMIT,
+        non_streaming_calls_trial=NO_LIMIT,
+        non_streaming_calls_paid=NO_LIMIT,
+    )
+
+
+def get_tenant_usage_limit_overrides(
+    tenant_id: str,
+) -> TenantUsageLimitOverrides | None:
+    """
+    Get the usage limit overrides for a specific tenant.
+
+    Args:
+        tenant_id: The tenant ID to look up
+
+    Returns:
+        TenantUsageLimitOverrides if the tenant has overrides, None otherwise.
+    """
+
+    if DEV_MODE:  # in dev mode, we return unlimited limits for all tenants
+        return unlimited(tenant_id)
+
+    global _tenant_usage_limit_overrides
+    time_since = time.time() - _last_fetch_time
+    if (
+        _tenant_usage_limit_overrides is None and time_since > _ERROR_FETCH_INTERVAL
+    ) or (time_since > _FETCH_INTERVAL):
+        logger.debug(
+            f"Last fetch time: {_last_fetch_time}, time since last fetch: {time_since}"
+        )
+
+        load_usage_limit_overrides()
+
+    # If we have failed to fetch from the control plane or we're in dev mode, don't usage limit anyone.
+    if _tenant_usage_limit_overrides is None or DEV_MODE:
+        return unlimited(tenant_id)
+    return _tenant_usage_limit_overrides.get(tenant_id)
--- a/backend/ee/onyx/server/tenants/billing.py
+++ b/backend/ee/onyx/server/tenants/billing.py
@@ -1,9 +1,9 @@
 from typing import cast
+from typing import Literal

 import requests
 import stripe

-from ee.onyx.configs.app_configs import STRIPE_PRICE_ID
 from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
 from ee.onyx.server.tenants.access import generate_data_plane_token
 from ee.onyx.server.tenants.models import BillingInformation
@@ -16,15 +16,21 @@ stripe.api_key = STRIPE_SECRET_KEY
 logger = setup_logger()


-def fetch_stripe_checkout_session(tenant_id: str) -> str:
+def fetch_stripe_checkout_session(
+    tenant_id: str,
+    billing_period: Literal["monthly", "annual"] = "monthly",
+) -> str:
    token = generate_data_plane_token()
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json",
    }
    url = f"{CONTROL_PLANE_API_BASE_URL}/create-checkout-session"
-    params = {"tenant_id": tenant_id}
-    response = requests.post(url, headers=headers, params=params)
+    payload = {
+        "tenant_id": tenant_id,
+        "billing_period": billing_period,
+    }
+    response = requests.post(url, headers=headers, json=payload)
    response.raise_for_status()
    return response.json()["sessionId"]

@@ -70,24 +76,46 @@ def fetch_billing_information(
    return BillingInformation(**response_data)


+def fetch_customer_portal_session(tenant_id: str, return_url: str | None = None) -> str:
+    """
+    Fetch a Stripe customer portal session URL from the control plane.
+    NOTE: This is currently only used for multi-tenant (cloud) deployments.
+    Self-hosted proxy endpoints will be added in a future phase.
+    """
+    token = generate_data_plane_token()
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+    }
+    url = f"{CONTROL_PLANE_API_BASE_URL}/create-customer-portal-session"
+    payload = {"tenant_id": tenant_id}
+    if return_url:
+        payload["return_url"] = return_url
+    response = requests.post(url, headers=headers, json=payload)
+    response.raise_for_status()
+    return response.json()["url"]
+
+
 def register_tenant_users(tenant_id: str, number_of_users: int) -> stripe.Subscription:
    """
-    Send a request to the control service to register the number of users for a tenant.
+    Update the number of seats for a tenant's subscription.
+    Preserves the existing price (monthly, annual, or grandfathered).
    """
-
-    if not STRIPE_PRICE_ID:
-        raise Exception("STRIPE_PRICE_ID is not set")
-
    response = fetch_tenant_stripe_information(tenant_id)
    stripe_subscription_id = cast(str, response.get("stripe_subscription_id"))

    subscription = stripe.Subscription.retrieve(stripe_subscription_id)
+    subscription_item = subscription["items"]["data"][0]
+
+    # Use existing price to preserve the customer's current plan
+    current_price_id = subscription_item.price.id
+
    updated_subscription = stripe.Subscription.modify(
        stripe_subscription_id,
        items=[
            {
-                "id": subscription["items"]["data"][0].id,
-                "price": STRIPE_PRICE_ID,
+                "id": subscription_item.id,
+                "price": current_price_id,
                "quantity": number_of_users,
            }
        ],
--- a/backend/ee/onyx/server/tenants/billing_api.py
+++ b/backend/ee/onyx/server/tenants/billing_api.py
@@ -1,15 +1,14 @@
-import stripe
 from fastapi import APIRouter
 from fastapi import Depends
 from fastapi import HTTPException

 from ee.onyx.auth.users import current_admin_user
-from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
 from ee.onyx.server.tenants.access import control_plane_dep
 from ee.onyx.server.tenants.billing import fetch_billing_information
+from ee.onyx.server.tenants.billing import fetch_customer_portal_session
 from ee.onyx.server.tenants.billing import fetch_stripe_checkout_session
-from ee.onyx.server.tenants.billing import fetch_tenant_stripe_information
 from ee.onyx.server.tenants.models import BillingInformation
+from ee.onyx.server.tenants.models import CreateSubscriptionSessionRequest
 from ee.onyx.server.tenants.models import ProductGatingFullSyncRequest
 from ee.onyx.server.tenants.models import ProductGatingRequest
 from ee.onyx.server.tenants.models import ProductGatingResponse
@@ -23,7 +22,6 @@ from onyx.utils.logger import setup_logger
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
 from shared_configs.contextvars import get_current_tenant_id

-stripe.api_key = STRIPE_SECRET_KEY
 logger = setup_logger()

 router = APIRouter(prefix="/tenants")
@@ -82,21 +80,17 @@ async def billing_information(
 async def create_customer_portal_session(
    _: User = Depends(current_admin_user),
 ) -> dict:
+    """
+    Create a Stripe customer portal session via the control plane.
+    NOTE: This is currently only used for multi-tenant (cloud) deployments.
+    Self-hosted proxy endpoints will be added in a future phase.
+    """
    tenant_id = get_current_tenant_id()
+    return_url = f"{WEB_DOMAIN}/admin/billing"

    try:
-        stripe_info = fetch_tenant_stripe_information(tenant_id)
-        stripe_customer_id = stripe_info.get("stripe_customer_id")
-        if not stripe_customer_id:
-            raise HTTPException(status_code=400, detail="Stripe customer ID not found")
-        logger.info(stripe_customer_id)
-
-        portal_session = stripe.billing_portal.Session.create(
-            customer=stripe_customer_id,
-            return_url=f"{WEB_DOMAIN}/admin/billing",
-        )
-        logger.info(portal_session)
-        return {"url": portal_session.url}
+        portal_url = fetch_customer_portal_session(tenant_id, return_url)
+        return {"url": portal_url}
    except Exception as e:
        logger.exception("Failed to create customer portal session")
        raise HTTPException(status_code=500, detail=str(e))
@@ -104,15 +98,18 @@ async def create_customer_portal_session(

@router.post("/create-subscription-session")
 async def create_subscription_session(
+    request: CreateSubscriptionSessionRequest | None = None,
    _: User = Depends(current_admin_user),
 ) -> SubscriptionSessionResponse:
    try:
        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
        if not tenant_id:
            raise HTTPException(status_code=400, detail="Tenant ID not found")
-        session_id = fetch_stripe_checkout_session(tenant_id)
+
+        billing_period = request.billing_period if request else "monthly"
+        session_id = fetch_stripe_checkout_session(tenant_id, billing_period)
        return SubscriptionSessionResponse(sessionId=session_id)

    except Exception as e:
-        logger.exception("Failed to create resubscription session")
+        logger.exception("Failed to create subscription session")
        raise HTTPException(status_code=500, detail=str(e))
--- a/backend/ee/onyx/server/tenants/models.py
+++ b/backend/ee/onyx/server/tenants/models.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from typing import Literal

 from pydantic import BaseModel

@@ -73,6 +74,12 @@ class SubscriptionSessionResponse(BaseModel):
    sessionId: str


+class CreateSubscriptionSessionRequest(BaseModel):
+    """Request to create a subscription checkout session."""
+
+    billing_period: Literal["monthly", "annual"] = "monthly"
+
+
 class TenantByDomainResponse(BaseModel):
    tenant_id: str
    number_of_users: int
--- a/backend/ee/onyx/server/tenants/product_gating.py
+++ b/backend/ee/onyx/server/tenants/product_gating.py
@@ -65,3 +65,9 @@ def get_gated_tenants() -> set[str]:
    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
    gated_tenants_bytes = cast(set[bytes], redis_client.smembers(GATED_TENANTS_KEY))
    return {tenant_id.decode("utf-8") for tenant_id in gated_tenants_bytes}
+
+
+def is_tenant_gated(tenant_id: str) -> bool:
+    """Fast O(1) check if tenant is in gated set (multi-tenant only)."""
+    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
+    return bool(redis_client.sismember(GATED_TENANTS_KEY, tenant_id))
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -1,5 +1,4 @@
 import asyncio
-import logging
 import uuid

 import aiohttp  # Async HTTP client
@@ -10,10 +9,7 @@ from fastapi import Request
 from sqlalchemy import select
 from sqlalchemy.orm import Session

-from ee.onyx.configs.app_configs import ANTHROPIC_DEFAULT_API_KEY
-from ee.onyx.configs.app_configs import COHERE_DEFAULT_API_KEY
 from ee.onyx.configs.app_configs import HUBSPOT_TRACKING_URL
-from ee.onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY
 from ee.onyx.server.tenants.access import generate_data_plane_token
 from ee.onyx.server.tenants.models import TenantByDomainResponse
 from ee.onyx.server.tenants.models import TenantCreationPayload
@@ -25,11 +21,18 @@ from ee.onyx.server.tenants.user_mapping import add_users_to_tenant
 from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
 from ee.onyx.server.tenants.user_mapping import user_owns_a_tenant
 from onyx.auth.users import exceptions
+from onyx.configs.app_configs import ANTHROPIC_DEFAULT_API_KEY
+from onyx.configs.app_configs import COHERE_DEFAULT_API_KEY
 from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
 from onyx.configs.app_configs import DEV_MODE
+from onyx.configs.app_configs import OPENAI_DEFAULT_API_KEY
+from onyx.configs.app_configs import OPENROUTER_DEFAULT_API_KEY
+from onyx.configs.app_configs import VERTEXAI_DEFAULT_CREDENTIALS
+from onyx.configs.app_configs import VERTEXAI_DEFAULT_LOCATION
 from onyx.configs.constants import MilestoneRecordType
 from onyx.db.engine.sql_engine import get_session_with_shared_schema
 from onyx.db.engine.sql_engine import get_session_with_tenant
+from onyx.db.image_generation import create_default_image_gen_config_from_api_key
 from onyx.db.llm import update_default_provider
 from onyx.db.llm import upsert_cloud_embedding_provider
 from onyx.db.llm import upsert_llm_provider
@@ -37,15 +40,25 @@ from onyx.db.models import AvailableTenant
 from onyx.db.models import IndexModelStatus
 from onyx.db.models import SearchSettings
 from onyx.db.models import UserTenantMapping
-from onyx.llm.llm_provider_options import ANTHROPIC_PROVIDER_NAME
-from onyx.llm.llm_provider_options import get_anthropic_model_names
-from onyx.llm.llm_provider_options import get_openai_model_names
-from onyx.llm.llm_provider_options import OPENAI_PROVIDER_NAME
+from onyx.llm.well_known_providers.auto_update_models import LLMRecommendations
+from onyx.llm.well_known_providers.constants import ANTHROPIC_PROVIDER_NAME
+from onyx.llm.well_known_providers.constants import OPENAI_PROVIDER_NAME
+from onyx.llm.well_known_providers.constants import OPENROUTER_PROVIDER_NAME
+from onyx.llm.well_known_providers.constants import VERTEX_CREDENTIALS_FILE_KWARG
+from onyx.llm.well_known_providers.constants import VERTEX_LOCATION_KWARG
+from onyx.llm.well_known_providers.constants import VERTEXAI_PROVIDER_NAME
+from onyx.llm.well_known_providers.llm_provider_options import (
+    get_recommendations,
+)
+from onyx.llm.well_known_providers.llm_provider_options import (
+    model_configurations_for_provider,
+)
 from onyx.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest
 from onyx.server.manage.llm.models import LLMProviderUpsertRequest
 from onyx.server.manage.llm.models import ModelConfigurationUpsertRequest
 from onyx.setup import setup_onyx
-from onyx.utils.telemetry import create_milestone_and_report
+from onyx.utils.logger import setup_logger
+from onyx.utils.telemetry import mt_cloud_telemetry
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
 from shared_configs.configs import TENANT_ID_PREFIX
@@ -53,7 +66,7 @@ from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
 from shared_configs.enums import EmbeddingProvider


-logger = logging.getLogger(__name__)
+logger = setup_logger()


 async def get_or_provision_tenant(
@@ -262,61 +275,173 @@ async def rollback_tenant_provisioning(tenant_id: str) -> None:
        logger.info(f"Tenant rollback completed successfully for tenant {tenant_id}")


-def configure_default_api_keys(db_session: Session) -> None:
-    if ANTHROPIC_DEFAULT_API_KEY:
-        anthropic_provider = LLMProviderUpsertRequest(
-            name="Anthropic",
-            provider=ANTHROPIC_PROVIDER_NAME,
-            api_key=ANTHROPIC_DEFAULT_API_KEY,
-            default_model_name="claude-3-7-sonnet-20250219",
-            fast_default_model_name="claude-3-5-sonnet-20241022",
-            model_configurations=[
-                ModelConfigurationUpsertRequest(
-                    name=name,
-                    is_visible=False,
-                    max_input_tokens=None,
-                )
-                for name in get_anthropic_model_names()
-            ],
-            api_key_changed=True,
-        )
-        try:
-            full_provider = upsert_llm_provider(anthropic_provider, db_session)
-            update_default_provider(full_provider.id, db_session)
-        except Exception as e:
-            logger.error(f"Failed to configure Anthropic provider: {e}")
-    else:
-        logger.error(
-            "ANTHROPIC_DEFAULT_API_KEY not set, skipping Anthropic provider configuration"
+def _build_model_configuration_upsert_requests(
+    provider_name: str,
+    recommendations: LLMRecommendations,
+) -> list[ModelConfigurationUpsertRequest]:
+    model_configurations = model_configurations_for_provider(
+        provider_name, recommendations
+    )
+    return [
+        ModelConfigurationUpsertRequest(
+            name=model_configuration.name,
+            is_visible=model_configuration.is_visible,
+            max_input_tokens=model_configuration.max_input_tokens,
+            supports_image_input=model_configuration.supports_image_input,
        )
+        for model_configuration in model_configurations
+    ]

+
+def configure_default_api_keys(db_session: Session) -> None:
+    """Configure default LLM providers using recommended-models.json for model selection."""
+    # Load recommendations from JSON config
+    recommendations = get_recommendations()
+
+    has_set_default_provider = False
+
+    def _upsert(request: LLMProviderUpsertRequest) -> None:
+        nonlocal has_set_default_provider
+        try:
+            provider = upsert_llm_provider(request, db_session)
+            if not has_set_default_provider:
+                update_default_provider(provider.id, db_session)
+                has_set_default_provider = True
+        except Exception as e:
+            logger.error(f"Failed to configure {request.provider} provider: {e}")
+
+    # Configure OpenAI provider
    if OPENAI_DEFAULT_API_KEY:
+        default_model = recommendations.get_default_model(OPENAI_PROVIDER_NAME)
+        if default_model is None:
+            logger.error(
+                f"No default model found for {OPENAI_PROVIDER_NAME} in recommendations"
+            )
+        default_model_name = default_model.name if default_model else "gpt-5.2"
+
        openai_provider = LLMProviderUpsertRequest(
            name="OpenAI",
            provider=OPENAI_PROVIDER_NAME,
            api_key=OPENAI_DEFAULT_API_KEY,
-            default_model_name="gpt-4o",
-            fast_default_model_name="gpt-4o-mini",
-            model_configurations=[
-                ModelConfigurationUpsertRequest(
-                    name=model_name,
-                    is_visible=False,
-                    max_input_tokens=None,
-                )
-                for model_name in get_openai_model_names()
-            ],
+            default_model_name=default_model_name,
+            model_configurations=_build_model_configuration_upsert_requests(
+                OPENAI_PROVIDER_NAME, recommendations
+            ),
            api_key_changed=True,
+            is_auto_mode=True,
        )
+        _upsert(openai_provider)
+
+        # Create default image generation config using the OpenAI API key
        try:
-            full_provider = upsert_llm_provider(openai_provider, db_session)
-            update_default_provider(full_provider.id, db_session)
+            create_default_image_gen_config_from_api_key(
+                db_session, OPENAI_DEFAULT_API_KEY
+            )
        except Exception as e:
-            logger.error(f"Failed to configure OpenAI provider: {e}")
+            logger.error(f"Failed to create default image gen config: {e}")
    else:
-        logger.error(
+        logger.info(
            "OPENAI_DEFAULT_API_KEY not set, skipping OpenAI provider configuration"
        )

+    # Configure Anthropic provider
+    if ANTHROPIC_DEFAULT_API_KEY:
+        default_model = recommendations.get_default_model(ANTHROPIC_PROVIDER_NAME)
+        if default_model is None:
+            logger.error(
+                f"No default model found for {ANTHROPIC_PROVIDER_NAME} in recommendations"
+            )
+        default_model_name = (
+            default_model.name if default_model else "claude-sonnet-4-5"
+        )
+
+        anthropic_provider = LLMProviderUpsertRequest(
+            name="Anthropic",
+            provider=ANTHROPIC_PROVIDER_NAME,
+            api_key=ANTHROPIC_DEFAULT_API_KEY,
+            default_model_name=default_model_name,
+            model_configurations=_build_model_configuration_upsert_requests(
+                ANTHROPIC_PROVIDER_NAME, recommendations
+            ),
+            api_key_changed=True,
+            is_auto_mode=True,
+        )
+        _upsert(anthropic_provider)
+    else:
+        logger.info(
+            "ANTHROPIC_DEFAULT_API_KEY not set, skipping Anthropic provider configuration"
+        )
+
+    # Configure Vertex AI provider
+    if VERTEXAI_DEFAULT_CREDENTIALS:
+        default_model = recommendations.get_default_model(VERTEXAI_PROVIDER_NAME)
+        if default_model is None:
+            logger.error(
+                f"No default model found for {VERTEXAI_PROVIDER_NAME} in recommendations"
+            )
+        default_model_name = default_model.name if default_model else "gemini-2.5-pro"
+
+        # Vertex AI uses custom_config for credentials and location
+        custom_config = {
+            VERTEX_CREDENTIALS_FILE_KWARG: VERTEXAI_DEFAULT_CREDENTIALS,
+            VERTEX_LOCATION_KWARG: VERTEXAI_DEFAULT_LOCATION,
+        }
+
+        vertexai_provider = LLMProviderUpsertRequest(
+            name="Google Vertex AI",
+            provider=VERTEXAI_PROVIDER_NAME,
+            custom_config=custom_config,
+            default_model_name=default_model_name,
+            model_configurations=_build_model_configuration_upsert_requests(
+                VERTEXAI_PROVIDER_NAME, recommendations
+            ),
+            api_key_changed=True,
+            is_auto_mode=True,
+        )
+        _upsert(vertexai_provider)
+    else:
+        logger.info(
+            "VERTEXAI_DEFAULT_CREDENTIALS not set, skipping Vertex AI provider configuration"
+        )
+
+    # Configure OpenRouter provider
+    if OPENROUTER_DEFAULT_API_KEY:
+        default_model = recommendations.get_default_model(OPENROUTER_PROVIDER_NAME)
+        if default_model is None:
+            logger.error(
+                f"No default model found for {OPENROUTER_PROVIDER_NAME} in recommendations"
+            )
+        default_model_name = default_model.name if default_model else "z-ai/glm-4.7"
+
+        # For OpenRouter, we use the visible models from recommendations as model_configurations
+        # since OpenRouter models are dynamic (fetched from their API)
+        visible_models = recommendations.get_visible_models(OPENROUTER_PROVIDER_NAME)
+        model_configurations = [
+            ModelConfigurationUpsertRequest(
+                name=model.name,
+                is_visible=True,
+                max_input_tokens=None,
+                display_name=model.display_name,
+            )
+            for model in visible_models
+        ]
+
+        openrouter_provider = LLMProviderUpsertRequest(
+            name="OpenRouter",
+            provider=OPENROUTER_PROVIDER_NAME,
+            api_key=OPENROUTER_DEFAULT_API_KEY,
+            default_model_name=default_model_name,
+            model_configurations=model_configurations,
+            api_key_changed=True,
+            is_auto_mode=True,
+        )
+        _upsert(openrouter_provider)
+    else:
+        logger.info(
+            "OPENROUTER_DEFAULT_API_KEY not set, skipping OpenRouter provider configuration"
+        )
+
+    # Configure Cohere embedding provider
    if COHERE_DEFAULT_API_KEY:
        cloud_embedding_provider = CloudEmbeddingProviderCreationRequest(
            provider_type=EmbeddingProvider.COHERE,
@@ -562,17 +687,11 @@ async def assign_tenant_to_user(
    try:
        add_users_to_tenant([email], tenant_id)

-        # Create milestone record in the same transaction context as the tenant assignment
-        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
-            create_milestone_and_report(
-                user=None,
-                distinct_id=tenant_id,
-                event_type=MilestoneRecordType.TENANT_CREATED,
-                properties={
-                    "email": email,
-                },
-                db_session=db_session,
-            )
+        mt_cloud_telemetry(
+            tenant_id=tenant_id,
+            distinct_id=email,
+            event=MilestoneRecordType.TENANT_CREATED,
+        )
    except Exception:
        logger.exception(f"Failed to assign tenant {tenant_id} to user {email}")
        raise Exception("Failed to assign tenant to user")
--- a/backend/ee/onyx/server/tenants/user_mapping.py
+++ b/backend/ee/onyx/server/tenants/user_mapping.py
@@ -249,6 +249,17 @@ def accept_user_invite(email: str, tenant_id: str) -> None:
            )
            raise

+    # Remove from invited users list since they've accepted
+    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+    try:
+        invited_users = get_invited_users()
+        if email in invited_users:
+            invited_users.remove(email)
+            write_invited_users(invited_users)
+            logger.info(f"Removed {email} from invited users list after acceptance")
+    finally:
+        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
+

 def deny_user_invite(email: str, tenant_id: str) -> None:
    """
--- a/backend/ee/onyx/server/token_rate_limits/api.py
+++ b/backend/ee/onyx/server/token_rate_limits/api.py
@@ -9,6 +9,7 @@ from ee.onyx.db.token_limit import fetch_user_group_token_rate_limits_for_user
 from ee.onyx.db.token_limit import insert_user_group_token_rate_limit
 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_curator_or_admin_user
+from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import User
 from onyx.db.token_limit import fetch_all_user_token_rate_limits
@@ -17,7 +18,7 @@ from onyx.server.query_and_chat.token_limit import any_rate_limit_exists
 from onyx.server.token_rate_limits.models import TokenRateLimitArgs
 from onyx.server.token_rate_limits.models import TokenRateLimitDisplay

-router = APIRouter(prefix="/admin/token-rate-limits")
+router = APIRouter(prefix="/admin/token-rate-limits", tags=PUBLIC_API_TAGS)


 """
--- a/backend/ee/onyx/server/usage_limits.py
+++ b/backend/ee/onyx/server/usage_limits.py
@@ -0,0 +1,38 @@
+"""EE Usage limits - trial detection via billing information."""
+
+from ee.onyx.server.tenants.billing import fetch_billing_information
+from ee.onyx.server.tenants.models import BillingInformation
+from ee.onyx.server.tenants.models import SubscriptionStatusResponse
+from onyx.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT
+
+logger = setup_logger()
+
+
+def is_tenant_on_trial(tenant_id: str) -> bool:
+    """
+    Determine if a tenant is currently on a trial subscription.
+
+    In multi-tenant mode, we fetch billing information from the control plane
+    to determine if the tenant has an active trial.
+    """
+    if not MULTI_TENANT:
+        return False
+
+    try:
+        billing_info = fetch_billing_information(tenant_id)
+
+        # If not subscribed at all, check if we have trial information
+        if isinstance(billing_info, SubscriptionStatusResponse):
+            # No subscription means they're likely on trial (new tenant)
+            return True
+
+        if isinstance(billing_info, BillingInformation):
+            return billing_info.status == "trialing"
+
+        return False
+
+    except Exception as e:
+        logger.warning(f"Failed to fetch billing info for trial check: {e}")
+        # Default to trial limits on error (more restrictive = safer)
+        return True
--- a/backend/ee/onyx/server/user_group/api.py
+++ b/backend/ee/onyx/server/user_group/api.py
@@ -18,6 +18,7 @@ from ee.onyx.server.user_group.models import UserGroupCreate
 from ee.onyx.server.user_group.models import UserGroupUpdate
 from onyx.auth.users import current_admin_user
 from onyx.auth.users import current_curator_or_admin_user
+from onyx.configs.constants import PUBLIC_API_TAGS
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import User
 from onyx.db.models import UserRole
@@ -25,7 +26,7 @@ from onyx.utils.logger import setup_logger

 logger = setup_logger()

-router = APIRouter(prefix="/manage")
+router = APIRouter(prefix="/manage", tags=PUBLIC_API_TAGS)


@router.get("/admin/user-group")
--- a/backend/ee/onyx/utils/license.py
+++ b/backend/ee/onyx/utils/license.py
@@ -0,0 +1,126 @@
+"""RSA-4096 license signature verification utilities."""
+
+import base64
+import json
+import os
+from datetime import datetime
+from datetime import timezone
+
+from cryptography.exceptions import InvalidSignature
+from cryptography.hazmat.primitives import hashes
+from cryptography.hazmat.primitives import serialization
+from cryptography.hazmat.primitives.asymmetric import padding
+from cryptography.hazmat.primitives.asymmetric.rsa import RSAPublicKey
+
+from ee.onyx.server.license.models import LicenseData
+from ee.onyx.server.license.models import LicensePayload
+from onyx.server.settings.models import ApplicationStatus
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+# RSA-4096 Public Key for license verification
+# Load from environment variable - key is generated on the control plane
+# In production, inject via Kubernetes secrets or secrets manager
+LICENSE_PUBLIC_KEY_PEM = os.environ.get("LICENSE_PUBLIC_KEY_PEM", "")
+
+
+def _get_public_key() -> RSAPublicKey:
+    """Load the public key from environment variable."""
+    if not LICENSE_PUBLIC_KEY_PEM:
+        raise ValueError(
+            "LICENSE_PUBLIC_KEY_PEM environment variable not set. "
+            "License verification requires the control plane public key."
+        )
+    key = serialization.load_pem_public_key(LICENSE_PUBLIC_KEY_PEM.encode())
+    if not isinstance(key, RSAPublicKey):
+        raise ValueError("Expected RSA public key")
+    return key
+
+
+def verify_license_signature(license_data: str) -> LicensePayload:
+    """
+    Verify RSA-4096 signature and return payload if valid.
+
+    Args:
+        license_data: Base64-encoded JSON containing payload and signature
+
+    Returns:
+        LicensePayload if signature is valid
+
+    Raises:
+        ValueError: If license data is invalid or signature verification fails
+    """
+    try:
+        # Decode the license data
+        decoded = json.loads(base64.b64decode(license_data))
+        license_obj = LicenseData(**decoded)
+
+        payload_json = json.dumps(
+            license_obj.payload.model_dump(mode="json"), sort_keys=True
+        )
+        signature_bytes = base64.b64decode(license_obj.signature)
+
+        # Verify signature using PSS padding (modern standard)
+        public_key = _get_public_key()
+        public_key.verify(
+            signature_bytes,
+            payload_json.encode(),
+            padding.PSS(
+                mgf=padding.MGF1(hashes.SHA256()),
+                salt_length=padding.PSS.MAX_LENGTH,
+            ),
+            hashes.SHA256(),
+        )
+
+        return license_obj.payload
+
+    except InvalidSignature:
+        logger.error("License signature verification failed")
+        raise ValueError("Invalid license signature")
+    except json.JSONDecodeError:
+        logger.error("Failed to decode license JSON")
+        raise ValueError("Invalid license format: not valid JSON")
+    except (ValueError, KeyError, TypeError) as e:
+        logger.error(f"License data validation error: {type(e).__name__}")
+        raise ValueError(f"Invalid license format: {type(e).__name__}")
+    except Exception:
+        logger.exception("Unexpected error during license verification")
+        raise ValueError("License verification failed: unexpected error")
+
+
+def get_license_status(
+    payload: LicensePayload,
+    grace_period_end: datetime | None = None,
+) -> ApplicationStatus:
+    """
+    Determine current license status based on expiry.
+
+    Args:
+        payload: The verified license payload
+        grace_period_end: Optional grace period end datetime
+
+    Returns:
+        ApplicationStatus indicating current license state
+    """
+    now = datetime.now(timezone.utc)
+
+    # Check if grace period has expired
+    if grace_period_end and now > grace_period_end:
+        return ApplicationStatus.GATED_ACCESS
+
+    # Check if license has expired
+    if now > payload.expires_at:
+        if grace_period_end and now <= grace_period_end:
+            return ApplicationStatus.GRACE_PERIOD
+        return ApplicationStatus.GATED_ACCESS
+
+    # License is valid
+    return ApplicationStatus.ACTIVE
+
+
+def is_license_valid(payload: LicensePayload) -> bool:
+    """Check if a license is currently valid (not expired)."""
+    now = datetime.now(timezone.utc)
+    return now <= payload.expires_at
--- a/backend/model_server/constants.py
+++ b/backend/model_server/constants.py
@@ -1,5 +1,4 @@
 MODEL_WARM_UP_STRING = "hi " * 512
-INFORMATION_CONTENT_MODEL_WARM_UP_STRING = "hi " * 16


 class GPUStatus:
--- a/backend/model_server/custom_models.py
+++ b/backend/model_server/custom_models.py
@@ -1,562 +0,0 @@
-from typing import cast
-from typing import Optional
-from typing import TYPE_CHECKING
-
-import numpy as np
-import torch
-import torch.nn.functional as F
-from fastapi import APIRouter
-from huggingface_hub import snapshot_download  # type: ignore
-
-from model_server.constants import INFORMATION_CONTENT_MODEL_WARM_UP_STRING
-from model_server.constants import MODEL_WARM_UP_STRING
-from model_server.onyx_torch_model import ConnectorClassifier
-from model_server.onyx_torch_model import HybridClassifier
-from model_server.utils import simple_log_function_time
-from onyx.utils.logger import setup_logger
-from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_REPO
-from shared_configs.configs import CONNECTOR_CLASSIFIER_MODEL_TAG
-from shared_configs.configs import (
-    INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH,
-)
-from shared_configs.configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
-from shared_configs.configs import INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
-from shared_configs.configs import (
-    INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE,
-)
-from shared_configs.configs import INDEXING_ONLY
-from shared_configs.configs import INFORMATION_CONTENT_MODEL_TAG
-from shared_configs.configs import INFORMATION_CONTENT_MODEL_VERSION
-from shared_configs.configs import INTENT_MODEL_TAG
-from shared_configs.configs import INTENT_MODEL_VERSION
-from shared_configs.model_server_models import ConnectorClassificationRequest
-from shared_configs.model_server_models import ConnectorClassificationResponse
-from shared_configs.model_server_models import ContentClassificationPrediction
-from shared_configs.model_server_models import IntentRequest
-from shared_configs.model_server_models import IntentResponse
-
-if TYPE_CHECKING:
-    from setfit import SetFitModel  # type: ignore
-    from transformers import PreTrainedTokenizer, BatchEncoding  # type: ignore
-
-
-logger = setup_logger()
-
-router = APIRouter(prefix="/custom")
-
-_CONNECTOR_CLASSIFIER_TOKENIZER: Optional["PreTrainedTokenizer"] = None
-_CONNECTOR_CLASSIFIER_MODEL: ConnectorClassifier | None = None
-
-_INTENT_TOKENIZER: Optional["PreTrainedTokenizer"] = None
-_INTENT_MODEL: HybridClassifier | None = None
-
-_INFORMATION_CONTENT_MODEL: Optional["SetFitModel"] = None
-
-_INFORMATION_CONTENT_MODEL_PROMPT_PREFIX: str = ""  # spec to model version!
-
-
-def get_connector_classifier_tokenizer() -> "PreTrainedTokenizer":
-    global _CONNECTOR_CLASSIFIER_TOKENIZER
-    from transformers import AutoTokenizer, PreTrainedTokenizer
-
-    if _CONNECTOR_CLASSIFIER_TOKENIZER is None:
-        # The tokenizer details are not uploaded to the HF hub since it's just the
-        # unmodified distilbert tokenizer.
-        _CONNECTOR_CLASSIFIER_TOKENIZER = cast(
-            PreTrainedTokenizer,
-            AutoTokenizer.from_pretrained("distilbert-base-uncased"),
-        )
-    return _CONNECTOR_CLASSIFIER_TOKENIZER
-
-
-def get_local_connector_classifier(
-    model_name_or_path: str = CONNECTOR_CLASSIFIER_MODEL_REPO,
-    tag: str = CONNECTOR_CLASSIFIER_MODEL_TAG,
-) -> ConnectorClassifier:
-    global _CONNECTOR_CLASSIFIER_MODEL
-    if _CONNECTOR_CLASSIFIER_MODEL is None:
-        try:
-            # Calculate where the cache should be, then load from local if available
-            local_path = snapshot_download(
-                repo_id=model_name_or_path, revision=tag, local_files_only=True
-            )
-            _CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained(
-                local_path
-            )
-        except Exception as e:
-            logger.warning(f"Failed to load model directly: {e}")
-            try:
-                # Attempt to download the model snapshot
-                logger.info(f"Downloading model snapshot for {model_name_or_path}")
-                local_path = snapshot_download(repo_id=model_name_or_path, revision=tag)
-                _CONNECTOR_CLASSIFIER_MODEL = ConnectorClassifier.from_pretrained(
-                    local_path
-                )
-            except Exception as e:
-                logger.error(
-                    f"Failed to load model even after attempted snapshot download: {e}"
-                )
-                raise
-    return _CONNECTOR_CLASSIFIER_MODEL
-
-
-def get_intent_model_tokenizer() -> "PreTrainedTokenizer":
-    from transformers import AutoTokenizer, PreTrainedTokenizer
-
-    global _INTENT_TOKENIZER
-    if _INTENT_TOKENIZER is None:
-        # The tokenizer details are not uploaded to the HF hub since it's just the
-        # unmodified distilbert tokenizer.
-        _INTENT_TOKENIZER = cast(
-            PreTrainedTokenizer,
-            AutoTokenizer.from_pretrained("distilbert-base-uncased"),
-        )
-    return _INTENT_TOKENIZER
-
-
-def get_local_intent_model(
-    model_name_or_path: str = INTENT_MODEL_VERSION,
-    tag: str | None = INTENT_MODEL_TAG,
-) -> HybridClassifier:
-    global _INTENT_MODEL
-    if _INTENT_MODEL is None:
-        try:
-            # Calculate where the cache should be, then load from local if available
-            logger.notice(f"Loading model from local cache: {model_name_or_path}")
-            local_path = snapshot_download(
-                repo_id=model_name_or_path, revision=tag, local_files_only=True
-            )
-            _INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
-            logger.notice(f"Loaded model from local cache: {local_path}")
-        except Exception as e:
-            logger.warning(f"Failed to load model directly: {e}")
-            try:
-                # Attempt to download the model snapshot
-                logger.notice(f"Downloading model snapshot for {model_name_or_path}")
-                local_path = snapshot_download(
-                    repo_id=model_name_or_path, revision=tag, local_files_only=False
-                )
-                _INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
-            except Exception as e:
-                logger.error(
-                    f"Failed to load model even after attempted snapshot download: {e}"
-                )
-                raise
-    return _INTENT_MODEL
-
-
-def get_local_information_content_model(
-    model_name_or_path: str = INFORMATION_CONTENT_MODEL_VERSION,
-    tag: str | None = INFORMATION_CONTENT_MODEL_TAG,
-) -> "SetFitModel":
-    from setfit import SetFitModel
-
-    global _INFORMATION_CONTENT_MODEL
-    if _INFORMATION_CONTENT_MODEL is None:
-        try:
-            # Calculate where the cache should be, then load from local if available
-            logger.notice(
-                f"Loading content information model from local cache: {model_name_or_path}"
-            )
-            local_path = snapshot_download(
-                repo_id=model_name_or_path, revision=tag, local_files_only=True
-            )
-            _INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)
-            logger.notice(
-                f"Loaded content information model from local cache: {local_path}"
-            )
-        except Exception as e:
-            logger.warning(f"Failed to load content information model directly: {e}")
-            try:
-                # Attempt to download the model snapshot
-                logger.notice(
-                    f"Downloading content information model snapshot for {model_name_or_path}"
-                )
-                local_path = snapshot_download(
-                    repo_id=model_name_or_path, revision=tag, local_files_only=False
-                )
-                _INFORMATION_CONTENT_MODEL = SetFitModel.from_pretrained(local_path)
-            except Exception as e:
-                logger.error(
-                    f"Failed to load content information model even after attempted snapshot download: {e}"
-                )
-                raise
-
-    return _INFORMATION_CONTENT_MODEL
-
-
-def tokenize_connector_classification_query(
-    connectors: list[str],
-    query: str,
-    tokenizer: "PreTrainedTokenizer",
-    connector_token_end_id: int,
-) -> tuple[torch.Tensor, torch.Tensor]:
-    """
-    Tokenize the connectors & user query into one prompt for the forward pass of ConnectorClassifier models
-
-    The attention mask is just all 1s. The prompt is CLS + each connector name suffixed with the connector end
-    token and then the user query.
-    """
-
-    input_ids = torch.tensor([tokenizer.cls_token_id], dtype=torch.long)
-
-    for connector in connectors:
-        connector_token_ids = tokenizer(
-            connector,
-            add_special_tokens=False,
-            return_tensors="pt",
-        )
-
-        input_ids = torch.cat(
-            (
-                input_ids,
-                connector_token_ids["input_ids"].squeeze(dim=0),
-                torch.tensor([connector_token_end_id], dtype=torch.long),
-            ),
-            dim=-1,
-        )
-    query_token_ids = tokenizer(
-        query,
-        add_special_tokens=False,
-        return_tensors="pt",
-    )
-
-    input_ids = torch.cat(
-        (
-            input_ids,
-            query_token_ids["input_ids"].squeeze(dim=0),
-            torch.tensor([tokenizer.sep_token_id], dtype=torch.long),
-        ),
-        dim=-1,
-    )
-    attention_mask = torch.ones(input_ids.numel(), dtype=torch.long)
-
-    return input_ids.unsqueeze(0), attention_mask.unsqueeze(0)
-
-
-def warm_up_connector_classifier_model() -> None:
-    logger.info(
-        f"Warming up connector_classifier model {CONNECTOR_CLASSIFIER_MODEL_TAG}"
-    )
-    connector_classifier_tokenizer = get_connector_classifier_tokenizer()
-    connector_classifier = get_local_connector_classifier()
-
-    input_ids, attention_mask = tokenize_connector_classification_query(
-        ["GitHub"],
-        "onyx classifier query google doc",
-        connector_classifier_tokenizer,
-        connector_classifier.connector_end_token_id,
-    )
-    input_ids = input_ids.to(connector_classifier.device)
-    attention_mask = attention_mask.to(connector_classifier.device)
-
-    connector_classifier(input_ids, attention_mask)
-
-
-def warm_up_intent_model() -> None:
-    logger.notice(f"Warming up Intent Model: {INTENT_MODEL_VERSION}")
-    intent_tokenizer = get_intent_model_tokenizer()
-    tokens = intent_tokenizer(
-        MODEL_WARM_UP_STRING, return_tensors="pt", truncation=True, padding=True
-    )
-
-    intent_model = get_local_intent_model()
-    device = intent_model.device
-    intent_model(
-        query_ids=tokens["input_ids"].to(device),
-        query_mask=tokens["attention_mask"].to(device),
-    )
-
-
-def warm_up_information_content_model() -> None:
-    logger.notice("Warming up Content Model")  # TODO: add version if needed
-
-    information_content_model = get_local_information_content_model()
-    information_content_model(INFORMATION_CONTENT_MODEL_WARM_UP_STRING)
-
-
-@simple_log_function_time()
-def run_inference(tokens: "BatchEncoding") -> tuple[list[float], list[float]]:
-    intent_model = get_local_intent_model()
-    device = intent_model.device
-
-    outputs = intent_model(
-        query_ids=tokens["input_ids"].to(device),
-        query_mask=tokens["attention_mask"].to(device),
-    )
-
-    token_logits = outputs["token_logits"]
-    intent_logits = outputs["intent_logits"]
-
-    # Move tensors to CPU before applying softmax and converting to numpy
-    intent_probabilities = F.softmax(intent_logits.cpu(), dim=-1).numpy()[0]
-    token_probabilities = F.softmax(token_logits.cpu(), dim=-1).numpy()[0]
-
-    # Extract the probabilities for the positive class (index 1) for each token
-    token_positive_probs = token_probabilities[:, 1].tolist()
-
-    return intent_probabilities.tolist(), token_positive_probs
-
-
-@simple_log_function_time()
-def run_content_classification_inference(
-    text_inputs: list[str],
-) -> list[ContentClassificationPrediction]:
-    """
-    Assign a score to the segments in question. The model stored in get_local_information_content_model()
-    creates the 'model score' based on its training, and the scores are then converted to a 0.0-1.0 scale.
-    In the code outside of the model/inference model servers that score will be converted into the actual
-    boost factor.
-    """
-
-    def _prob_to_score(prob: float) -> float:
-        """
-        Conversion of base score to 0.0 - 1.0 score. Note that the min/max values depend on the model!
-        """
-        _MIN_BASE_SCORE = 0.25
-        _MAX_BASE_SCORE = 0.75
-        if prob < _MIN_BASE_SCORE:
-            raw_score = 0.0
-        elif prob < _MAX_BASE_SCORE:
-            raw_score = (prob - _MIN_BASE_SCORE) / (_MAX_BASE_SCORE - _MIN_BASE_SCORE)
-        else:
-            raw_score = 1.0
-        return (
-            INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
-            + (
-                INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MAX
-                - INDEXING_INFORMATION_CONTENT_CLASSIFICATION_MIN
-            )
-            * raw_score
-        )
-
-    _BATCH_SIZE = 32
-    content_model = get_local_information_content_model()
-
-    # Process inputs in batches
-    all_output_classes: list[int] = []
-    all_base_output_probabilities: list[float] = []
-
-    for i in range(0, len(text_inputs), _BATCH_SIZE):
-        batch = text_inputs[i : i + _BATCH_SIZE]
-        batch_with_prefix = []
-        batch_indices = []
-
-        # Pre-allocate results for this batch
-        batch_output_classes: list[np.ndarray] = [np.array(1)] * len(batch)
-        batch_probabilities: list[np.ndarray] = [np.array(1.0)] * len(batch)
-
-        # Pre-process batch to handle long input exceptions
-        for j, text in enumerate(batch):
-            if len(text) == 0:
-                # if no input, treat as non-informative from the model's perspective
-                batch_output_classes[j] = np.array(0)
-                batch_probabilities[j] = np.array(0.0)
-                logger.warning("Input for Content Information Model is empty")
-
-            elif (
-                len(text.split())
-                <= INDEXING_INFORMATION_CONTENT_CLASSIFICATION_CUTOFF_LENGTH
-            ):
-                # if input is short, use the model
-                batch_with_prefix.append(
-                    _INFORMATION_CONTENT_MODEL_PROMPT_PREFIX + text
-                )
-                batch_indices.append(j)
-            else:
-                # if longer than cutoff, treat as informative (stay with default), but issue warning
-                logger.warning("Input for Content Information Model too long")
-
-        if batch_with_prefix:  # Only run model if we have valid inputs
-            # Get predictions for the batch
-            model_output_classes = content_model(batch_with_prefix)
-            model_output_probabilities = content_model.predict_proba(batch_with_prefix)
-
-            # Place results in the correct positions
-            for idx, batch_idx in enumerate(batch_indices):
-                batch_output_classes[batch_idx] = model_output_classes[idx].numpy()
-                batch_probabilities[batch_idx] = model_output_probabilities[idx][
-                    1
-                ].numpy()  # x[1] is prob of the positive class
-
-        all_output_classes.extend([int(x) for x in batch_output_classes])
-        all_base_output_probabilities.extend([float(x) for x in batch_probabilities])
-
-    logits = [
-        np.log(p / (1 - p)) if p != 0.0 and p != 1.0 else (100 if p == 1.0 else -100)
-        for p in all_base_output_probabilities
-    ]
-    scaled_logits = [
-        logit / INDEXING_INFORMATION_CONTENT_CLASSIFICATION_TEMPERATURE
-        for logit in logits
-    ]
-    output_probabilities_with_temp = [
-        np.exp(scaled_logit) / (1 + np.exp(scaled_logit))
-        for scaled_logit in scaled_logits
-    ]
-
-    prediction_scores = [
-        _prob_to_score(p_temp) for p_temp in output_probabilities_with_temp
-    ]
-
-    content_classification_predictions = [
-        ContentClassificationPrediction(
-            predicted_label=predicted_label, content_boost_factor=output_score
-        )
-        for predicted_label, output_score in zip(all_output_classes, prediction_scores)
-    ]
-
-    return content_classification_predictions
-
-
-def map_keywords(
-    input_ids: torch.Tensor, tokenizer: "PreTrainedTokenizer", is_keyword: list[bool]
-) -> list[str]:
-    tokens = tokenizer.convert_ids_to_tokens(input_ids)  # type: ignore
-
-    if not len(tokens) == len(is_keyword):
-        raise ValueError("Length of tokens and keyword predictions must match")
-
-    if input_ids[0] == tokenizer.cls_token_id:
-        tokens = tokens[1:]
-        is_keyword = is_keyword[1:]
-
-    if input_ids[-1] == tokenizer.sep_token_id:
-        tokens = tokens[:-1]
-        is_keyword = is_keyword[:-1]
-
-    unk_token = tokenizer.unk_token
-    if unk_token in tokens:
-        raise ValueError("Unknown token detected in the input")
-
-    keywords = []
-    current_keyword = ""
-
-    for ind, token in enumerate(tokens):
-        if is_keyword[ind]:
-            if token.startswith("##"):
-                current_keyword += token[2:]
-            else:
-                if current_keyword:
-                    keywords.append(current_keyword)
-                current_keyword = token
-        else:
-            # If mispredicted a later token of a keyword, add it to the current keyword
-            # to complete it
-            if current_keyword:
-                if len(current_keyword) > 2 and current_keyword.startswith("##"):
-                    current_keyword = current_keyword[2:]
-
-                else:
-                    keywords.append(current_keyword)
-                    current_keyword = ""
-
-    if current_keyword:
-        keywords.append(current_keyword)
-
-    return keywords
-
-
-def clean_keywords(keywords: list[str]) -> list[str]:
-    cleaned_words = []
-    for word in keywords:
-        word = word[:-2] if word.endswith("'s") else word
-        word = word.replace("/", " ")
-        word = word.replace("'", "").replace('"', "")
-        cleaned_words.extend([w for w in word.strip().split() if w and not w.isspace()])
-    return cleaned_words
-
-
-def run_connector_classification(req: ConnectorClassificationRequest) -> list[str]:
-    tokenizer = get_connector_classifier_tokenizer()
-    model = get_local_connector_classifier()
-
-    connector_names = req.available_connectors
-
-    input_ids, attention_mask = tokenize_connector_classification_query(
-        connector_names,
-        req.query,
-        tokenizer,
-        model.connector_end_token_id,
-    )
-    input_ids = input_ids.to(model.device)
-    attention_mask = attention_mask.to(model.device)
-
-    global_confidence, classifier_confidence = model(input_ids, attention_mask)
-
-    if global_confidence.item() < 0.5:
-        return []
-
-    passed_connectors = []
-
-    for i, connector_name in enumerate(connector_names):
-        if classifier_confidence.view(-1)[i].item() > 0.5:
-            passed_connectors.append(connector_name)
-
-    return passed_connectors
-
-
-def run_analysis(intent_req: IntentRequest) -> tuple[bool, list[str]]:
-    tokenizer = get_intent_model_tokenizer()
-    model_input = tokenizer(
-        intent_req.query, return_tensors="pt", truncation=False, padding=False
-    )
-
-    if len(model_input.input_ids[0]) > 512:
-        # If the user text is too long, assume it is semantic and keep all words
-        return True, intent_req.query.split()
-
-    intent_probs, token_probs = run_inference(model_input)
-
-    is_keyword_sequence = intent_probs[0] >= intent_req.keyword_percent_threshold
-
-    keyword_preds = [
-        token_prob >= intent_req.keyword_percent_threshold for token_prob in token_probs
-    ]
-
-    try:
-        keywords = map_keywords(model_input.input_ids[0], tokenizer, keyword_preds)
-    except Exception as e:
-        logger.warning(
-            f"Failed to extract keywords for query: {intent_req.query} due to {e}"
-        )
-        # Fallback to keeping all words
-        keywords = intent_req.query.split()
-
-    cleaned_keywords = clean_keywords(keywords)
-
-    return is_keyword_sequence, cleaned_keywords
-
-
-@router.post("/connector-classification")
-async def process_connector_classification_request(
-    classification_request: ConnectorClassificationRequest,
-) -> ConnectorClassificationResponse:
-    if INDEXING_ONLY:
-        raise RuntimeError(
-            "Indexing model server should not call connector classification endpoint"
-        )
-
-    if len(classification_request.available_connectors) == 0:
-        return ConnectorClassificationResponse(connectors=[])
-
-    connectors = run_connector_classification(classification_request)
-    return ConnectorClassificationResponse(connectors=connectors)
-
-
-@router.post("/query-analysis")
-async def process_analysis_request(
-    intent_request: IntentRequest,
-) -> IntentResponse:
-    if INDEXING_ONLY:
-        raise RuntimeError("Indexing model server should not call intent endpoint")
-
-    is_keyword, keywords = run_analysis(intent_request)
-    return IntentResponse(is_keyword=is_keyword, keywords=keywords)
-
-
-@router.post("/content-classification")
-async def process_content_classification_request(
-    content_classification_requests: list[str],
-) -> list[ContentClassificationPrediction]:
-    return run_content_classification_inference(content_classification_requests)
--- a/Show More
+++ b/Show More