fix(projects): Fix Migration (#5550 )

fix: avoid attempting to retrieve with non-org owners (#5555 )
fix(saml): Update the route to take GET's and transform to POST (#5554 )
2026-02-27 04:35:50 +00:00 · 2025-09-30 12:56:40 -07:00 · 2025-09-30 12:55:03 -07:00 · 2025-09-30 11:28:07 -07:00 · 2025-09-30 10:41:38 -07:00 · 2025-09-30 09:57:36 -07:00
554 changed files with 60641 additions and 30538 deletions
--- a/.github/actions/custom-build-and-push/action.yml
+++ b/.github/actions/custom-build-and-push/action.yml
@@ -35,6 +35,16 @@ inputs:
  cache-to:
    description: 'Cache destinations'
    required: false
+  outputs:
+    description: 'Output destinations'
+    required: false
+  provenance:
+    description: 'Generate provenance attestation'
+    required: false
+    default: 'false'
+  build-args:
+    description: 'Build arguments'
+    required: false
  retry-wait-time:
    description: 'Time to wait before attempt 2 in seconds'
    required: false
@@ -62,6 +72,9 @@ runs:
        no-cache: ${{ inputs.no-cache }}
        cache-from: ${{ inputs.cache-from }}
        cache-to: ${{ inputs.cache-to }}
+        outputs: ${{ inputs.outputs }}
+        provenance: ${{ inputs.provenance }}
+        build-args: ${{ inputs.build-args }}

    - name: Wait before attempt 2
      if: steps.buildx1.outcome != 'success'
@@ -85,6 +98,9 @@ runs:
        no-cache: ${{ inputs.no-cache }}
        cache-from: ${{ inputs.cache-from }}
        cache-to: ${{ inputs.cache-to }}
+        outputs: ${{ inputs.outputs }}
+        provenance: ${{ inputs.provenance }}
+        build-args: ${{ inputs.build-args }}

    - name: Wait before attempt 3
      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success'
@@ -108,6 +124,9 @@ runs:
        no-cache: ${{ inputs.no-cache }}
        cache-from: ${{ inputs.cache-from }}
        cache-to: ${{ inputs.cache-to }}
+        outputs: ${{ inputs.outputs }}
+        provenance: ${{ inputs.provenance }}
+        build-args: ${{ inputs.build-args }}

    - name: Report failure
      if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' && steps.buildx3.outcome != 'success'
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -6,9 +6,6 @@

 [Describe the tests you ran to verify your changes]

-## Backporting (check the box to trigger backport action)
+## Additional Options

-Note: You have to check that the action passes, otherwise resolve the conflicts manually and tag the patches.
-
- [ ] This PR should be backported (make sure to check that the backport attempt succeeds)
 - [ ] [Optional] Override Linear Check
--- a/.github/workflows/check-lazy-imports.yml
+++ b/.github/workflows/check-lazy-imports.yml
@@ -0,0 +1,24 @@
+name: Check Lazy Imports
+
+on:
+  merge_group:
+  pull_request:
+    branches:
+      - main
+      - 'release/**'
+
+jobs:
+  check-lazy-imports:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.11'
+
+    - name: Check lazy imports
+      run: python3 backend/scripts/check_lazy_imports.py
--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -142,15 +142,25 @@ jobs:
      # can re-enable when they figure it out
      # https://github.com/aquasecurity/trivy/discussions/7538
      # https://github.com/aquasecurity/trivy-action/issues/389
+      # Security: Using pinned digest (0.65.0@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436)
+      # Security: No Docker socket mount needed for remote registry scanning
      - name: Run Trivy vulnerability scanner
-        uses: aquasecurity/trivy-action@master
-        env:
-          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
-          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
-          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
-          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
+        uses: nick-fields/retry@v3
        with:
-          # To run locally: trivy image --severity HIGH,CRITICAL onyxdotapp/onyx-backend
-          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-          severity: "CRITICAL,HIGH"
-          trivyignores: ./backend/.trivyignore
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -v ${{ github.workspace }}/backend/.trivyignore:/tmp/.trivyignore:ro \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              --ignorefile /tmp/.trivyignore \
+              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -139,12 +139,20 @@ jobs:
      # https://github.com/aquasecurity/trivy/discussions/7538
      # https://github.com/aquasecurity/trivy-action/issues/389
      - name: Run Trivy vulnerability scanner
-        uses: aquasecurity/trivy-action@master
-        env:
-          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
-          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
-          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
-          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
+        uses: nick-fields/retry@v3
        with:
-          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-          severity: "CRITICAL,HIGH"
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -99,7 +99,7 @@ jobs:
    needs: [check_model_server_changes]
    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on:
-      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"]
+      [runs-on, runner=8cpu-linux-arm64, "run-id=${{ github.run_id }}-arm64"]
    env:
      PLATFORM_PAIR: linux-arm64
    steps:
@@ -164,13 +164,20 @@ jobs:
          fi

      - name: Run Trivy vulnerability scanner
-        uses: aquasecurity/trivy-action@master
-        env:
-          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
-          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
-          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
-          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
+        uses: nick-fields/retry@v3
        with:
-          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-          severity: "CRITICAL,HIGH"
-          timeout: "10m"
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/docker-build-push-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-web-container-on-tag.yml
@@ -150,12 +150,20 @@ jobs:
      # https://github.com/aquasecurity/trivy/discussions/7538
      # https://github.com/aquasecurity/trivy-action/issues/389
      - name: Run Trivy vulnerability scanner
-        uses: aquasecurity/trivy-action@master
-        env:
-          TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db:2"
-          TRIVY_JAVA_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-java-db:1"
-          TRIVY_USERNAME: ${{ secrets.DOCKER_USERNAME }}
-          TRIVY_PASSWORD: ${{ secrets.DOCKER_TOKEN }}
+        uses: nick-fields/retry@v3
        with:
-          image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
-          severity: "CRITICAL,HIGH"
+          timeout_minutes: 30
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            docker run --rm -v $HOME/.cache/trivy:/root/.cache/trivy \
+              -e TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db:2" \
+              -e TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db:1" \
+              -e TRIVY_USERNAME="${{ secrets.DOCKER_USERNAME }}" \
+              -e TRIVY_PASSWORD="${{ secrets.DOCKER_TOKEN }}" \
+              aquasec/trivy@sha256:a22415a38938a56c379387a8163fcb0ce38b10ace73e593475d3658d578b2436 \
+              image \
+              --skip-version-check \
+              --timeout 20m \
+              --severity CRITICAL,HIGH \
+              docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -27,6 +27,7 @@ jobs:
        run: |
          helm repo add bitnami https://charts.bitnami.com/bitnami
          helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
+          helm repo add keda https://kedacore.github.io/charts
          helm repo update

      - name: Build chart dependencies
@@ -46,4 +47,4 @@ jobs:
          charts_dir: deployment/helm/charts
          branch: gh-pages
          commit_username: ${{ github.actor }}
-          commit_email: ${{ github.actor }}@users.noreply.github.com
+          commit_email: ${{ github.actor }}@users.noreply.github.com
--- a/.github/workflows/pr-backport-autotrigger.yml
+++ b/.github/workflows/pr-backport-autotrigger.yml
@@ -1,124 +0,0 @@
-name: Backport on Merge
-
-# Note this workflow does not trigger the builds, be sure to manually tag the branches to trigger the builds
-
-on:
-  pull_request:
-    types: [closed] # Later we check for merge so only PRs that go in can get backported
-
-permissions:
-  contents: write
-  actions: write
-
-jobs:
-  backport:
-    if: github.event.pull_request.merged == true
-    runs-on: ubuntu-latest
-    env:
-      GITHUB_TOKEN: ${{ secrets.YUHONG_GH_ACTIONS }}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
-          fetch-depth: 0
-
-      - name: Set up Git user
-        run: |
-          git config user.name "Richard Kuo [bot]"
-          git config user.email "rkuo[bot]@onyx.app"
-          git fetch --prune
-
-      - name: Check for Backport Checkbox
-        id: checkbox-check
-        run: |
-          PR_BODY="${{ github.event.pull_request.body }}"
-          if [[ "$PR_BODY" == *"[x] This PR should be backported"* ]]; then
-            echo "backport=true" >> $GITHUB_OUTPUT
-          else
-            echo "backport=false" >> $GITHUB_OUTPUT
-          fi
-
-      - name: List and sort release branches
-        id: list-branches
-        run: |
-          git fetch --all --tags
-          BRANCHES=$(git for-each-ref --format='%(refname:short)' refs/remotes/origin/release/* | sed 's|origin/release/||' | sort -Vr)
-          BETA=$(echo "$BRANCHES" | head -n 1)
-          STABLE=$(echo "$BRANCHES" | head -n 2 | tail -n 1)
-          echo "beta=release/$BETA" >> $GITHUB_OUTPUT
-          echo "stable=release/$STABLE" >> $GITHUB_OUTPUT
-          # Fetch latest tags for beta and stable
-          LATEST_BETA_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*-beta.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$" | grep -v -- "-cloud" | sort -Vr | head -n 1)
-          LATEST_STABLE_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+$" | sort -Vr | head -n 1)
-
-          # Handle case where no beta tags exist
-          if [[ -z "$LATEST_BETA_TAG" ]]; then
-            NEW_BETA_TAG="v1.0.0-beta.1"
-          else
-            NEW_BETA_TAG=$(echo $LATEST_BETA_TAG | awk -F '[.-]' '{print $1 "." $2 "." $3 "-beta." ($NF+1)}')
-          fi
-
-          # Increment latest stable tag
-          NEW_STABLE_TAG=$(echo $LATEST_STABLE_TAG | awk -F '.' '{print $1 "." $2 "." ($3+1)}')
-          echo "latest_beta_tag=$LATEST_BETA_TAG" >> $GITHUB_OUTPUT
-          echo "latest_stable_tag=$LATEST_STABLE_TAG" >> $GITHUB_OUTPUT
-          echo "new_beta_tag=$NEW_BETA_TAG" >> $GITHUB_OUTPUT
-          echo "new_stable_tag=$NEW_STABLE_TAG" >> $GITHUB_OUTPUT
-
-      - name: Echo branch and tag information
-        run: |
-          echo "Beta branch: ${{ steps.list-branches.outputs.beta }}"
-          echo "Stable branch: ${{ steps.list-branches.outputs.stable }}"
-          echo "Latest beta tag: ${{ steps.list-branches.outputs.latest_beta_tag }}"
-          echo "Latest stable tag: ${{ steps.list-branches.outputs.latest_stable_tag }}"
-          echo "New beta tag: ${{ steps.list-branches.outputs.new_beta_tag }}"
-          echo "New stable tag: ${{ steps.list-branches.outputs.new_stable_tag }}"
-
-      - name: Trigger Backport
-        if: steps.checkbox-check.outputs.backport == 'true'
-        run: |
-          set -e
-          echo "Backporting to beta ${{ steps.list-branches.outputs.beta }} and stable ${{ steps.list-branches.outputs.stable }}"
-
-          # Echo the merge commit SHA
-          echo "Merge commit SHA: ${{ github.event.pull_request.merge_commit_sha }}"
-
-          # Fetch all history for all branches and tags
-          git fetch --prune
-
-          # Reset and prepare the beta branch
-          git checkout ${{ steps.list-branches.outputs.beta }}
-          echo "Last 5 commits on beta branch:"
-          git log -n 5 --pretty=format:"%H"
-          echo ""  # Newline for formatting
-
-          # Cherry-pick the merge commit from the merged PR
-          git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
-            echo "Cherry-pick to beta failed due to conflicts."
-            exit 1
-          }
-
-          # Create new beta branch/tag
-          git tag ${{ steps.list-branches.outputs.new_beta_tag }}
-          # Push the changes and tag to the beta branch using PAT
-          git push origin ${{ steps.list-branches.outputs.beta }}
-          git push origin ${{ steps.list-branches.outputs.new_beta_tag }}
-
-          # Reset and prepare the stable branch
-          git checkout ${{ steps.list-branches.outputs.stable }}
-          echo "Last 5 commits on stable branch:"
-          git log -n 5 --pretty=format:"%H"
-          echo ""  # Newline for formatting
-
-          # Cherry-pick the merge commit from the merged PR
-          git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
-            echo "Cherry-pick to stable failed due to conflicts."
-            exit 1
-          }
-
-          # Create new stable branch/tag
-          git tag ${{ steps.list-branches.outputs.new_stable_tag }}
-          # Push the changes and tag to the stable branch using PAT
-          git push origin ${{ steps.list-branches.outputs.stable }}
-          git push origin ${{ steps.list-branches.outputs.new_stable_tag }}
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -21,6 +21,10 @@ env:
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}

+  # LLMs
+  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+
 jobs:
  discover-test-dirs:
    runs-on: ubuntu-latest
@@ -39,8 +43,8 @@ jobs:

  external-dependency-unit-tests:
    needs: discover-test-dirs
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    # Use larger runner with more resources for Vespa
+    runs-on: [runs-on, runner=16cpu-linux-x64, "run-id=${{ github.run_id }}"]
    
    strategy:
      fail-fast: false
@@ -49,6 +53,7 @@ jobs:

    env:
      PYTHONPATH: ./backend
+      MODEL_SERVER_HOST: "disabled"

    steps:
      - name: Checkout code
@@ -74,19 +79,30 @@ jobs:
      - name: Set up Standard Dependencies
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack up -d minio relational_db cache index
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d minio relational_db cache index
+
+      - name: Wait for services
+        run: |
+          echo "Waiting for services to be ready..."
+          sleep 30
+          
+          # Wait for Vespa specifically
+          echo "Waiting for Vespa to be ready..."
+          timeout 300 bash -c 'until curl -f -s http://localhost:8081/ApplicationStatus > /dev/null 2>&1; do echo "Vespa not ready, waiting..."; sleep 10; done' || echo "Vespa timeout - continuing anyway"
+          
+          echo "Services should be ready now"

      - name: Run migrations
        run: |
          cd backend
+          # Run migrations to head
          alembic upgrade head
+          alembic heads --verbose

      - name: Run Tests for ${{ matrix.test-dir }}
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
          py.test \
-            -n 8 \
-            --dist loadfile \
            --durations=8 \
            -o junit_family=xunit2 \
            -xv \
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -53,27 +53,155 @@ jobs:
      if: steps.list-changed.outputs.changed == 'true'
      uses: helm/kind-action@v1.12.0

-    - name: Run chart-testing (install)
+    - name: Pre-install cluster status check
      if: steps.list-changed.outputs.changed == 'true'
-      run: ct install --all \
-        --helm-extra-set-args="\
-          --set=nginx.enabled=false \
-          --set=postgresql.enabled=false \
-          --set=redis.enabled=false \
-          --set=minio.enabled=false \
-          --set=vespa.enabled=false \
-          --set=slackbot.enabled=false \
-          --set=api.replicaCount=0 \
-          --set=inferenceCapability.replicaCount=0 \
-          --set=indexCapability.replicaCount=0 \
-          --set=celery_beat.replicaCount=0 \
-          --set=celery_worker_heavy.replicaCount=0 \
-          --set=celery_worker_docprocessing.replicaCount=0 \
-          --set=celery_worker_light.replicaCount=0 \
-          --set=celery_worker_monitoring.replicaCount=0 \
-          --set=celery_worker_primary.replicaCount=0 \
-          --set=celery_worker_user_files_indexing.replicaCount=0" \
-        --debug --config ct.yaml
+      run: |
+        echo "=== Pre-install Cluster Status ==="
+        kubectl get nodes -o wide
+        kubectl get pods --all-namespaces
+        kubectl get storageclass
+
+    - name: Add Helm repositories and update
+      if: steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Adding Helm repositories ==="
+        helm repo add bitnami https://charts.bitnami.com/bitnami
+        helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
+        helm repo update
+
+    - name: Pre-pull critical images
+      if: steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Pre-pulling critical images to avoid timeout ==="
+        # Get kind cluster name
+        KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
+        echo "Kind cluster: $KIND_CLUSTER"
+        
+        # Pre-pull images that are likely to be used
+        echo "Pre-pulling PostgreSQL image..."
+        docker pull postgres:15-alpine || echo "Failed to pull postgres:15-alpine"
+        kind load docker-image postgres:15-alpine --name $KIND_CLUSTER || echo "Failed to load postgres image"
+        
+        echo "Pre-pulling Redis image..."
+        docker pull redis:7-alpine || echo "Failed to pull redis:7-alpine"
+        kind load docker-image redis:7-alpine --name $KIND_CLUSTER || echo "Failed to load redis image"
+        
+        echo "Pre-pulling Onyx images..."
+        docker pull docker.io/onyxdotapp/onyx-web-server:latest || echo "Failed to pull onyx web server"
+        docker pull docker.io/onyxdotapp/onyx-backend:latest || echo "Failed to pull onyx backend"
+        kind load docker-image docker.io/onyxdotapp/onyx-web-server:latest --name $KIND_CLUSTER || echo "Failed to load onyx web server"
+        kind load docker-image docker.io/onyxdotapp/onyx-backend:latest --name $KIND_CLUSTER || echo "Failed to load onyx backend"
+        
+        echo "=== Images loaded into Kind cluster ==="
+        docker exec $KIND_CLUSTER-control-plane crictl images | grep -E "(postgres|redis|onyx)" || echo "Some images may still be loading..."
+
+    - name: Validate chart dependencies
+      if: steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Validating chart dependencies ==="
+        cd deployment/helm/charts/onyx
+        helm dependency update
+        helm lint .
+
+    - name: Run chart-testing (install) with enhanced monitoring
+      timeout-minutes: 25
+      if: steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Starting chart installation with monitoring ==="
+        
+        # Function to monitor cluster state
+        monitor_cluster() {
+          while true; do
+            echo "=== Cluster Status Check at $(date) ==="
+            # Only show non-running pods to reduce noise
+            NON_RUNNING_PODS=$(kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded --no-headers 2>/dev/null | wc -l)
+            if [ "$NON_RUNNING_PODS" -gt 0 ]; then
+              echo "Non-running pods:"
+              kubectl get pods --all-namespaces --field-selector=status.phase!=Running,status.phase!=Succeeded
+            else
+              echo "All pods running successfully"
+            fi
+            # Only show recent events if there are issues
+            RECENT_EVENTS=$(kubectl get events --sort-by=.lastTimestamp --all-namespaces --field-selector=type!=Normal 2>/dev/null | tail -5)
+            if [ -n "$RECENT_EVENTS" ]; then
+              echo "Recent warnings/errors:"
+              echo "$RECENT_EVENTS"
+            fi
+            sleep 60
+          done
+        }
+        
+        # Start monitoring in background
+        monitor_cluster &
+        MONITOR_PID=$!
+        
+        # Set up cleanup
+        cleanup() {
+          echo "=== Cleaning up monitoring process ==="
+          kill $MONITOR_PID 2>/dev/null || true
+          echo "=== Final cluster state ==="
+          kubectl get pods --all-namespaces
+          kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -20
+        }
+        
+        # Trap cleanup on exit
+        trap cleanup EXIT
+        
+        # Run the actual installation with detailed logging
+        echo "=== Starting ct install ==="
+        ct install --all \
+          --helm-extra-set-args="\
+            --set=nginx.enabled=false \
+            --set=minio.enabled=false \
+            --set=vespa.enabled=false \
+            --set=slackbot.enabled=false \
+            --set=postgresql.enabled=true \
+            --set=postgresql.primary.persistence.enabled=false \
+            --set=redis.enabled=true \
+            --set=webserver.replicaCount=1 \
+            --set=api.replicaCount=0 \
+            --set=inferenceCapability.replicaCount=0 \
+            --set=indexCapability.replicaCount=0 \
+            --set=celery_beat.replicaCount=0 \
+            --set=celery_worker_heavy.replicaCount=0 \
+            --set=celery_worker_docfetching.replicaCount=0 \
+            --set=celery_worker_docprocessing.replicaCount=0 \
+            --set=celery_worker_light.replicaCount=0 \
+            --set=celery_worker_monitoring.replicaCount=0 \
+            --set=celery_worker_primary.replicaCount=0 \
+            --set=celery_worker_user_file_processing.replicaCount=0 \
+            --set=celery_worker_user_files_indexing.replicaCount=0" \
+          --helm-extra-args="--timeout 900s --debug" \
+          --debug --config ct.yaml
+        
+        echo "=== Installation completed successfully ==="
+        kubectl get pods --all-namespaces
+
+    - name: Post-install verification
+      if: steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Post-install verification ==="
+        kubectl get pods --all-namespaces
+        kubectl get services --all-namespaces
+        # Only show issues if they exist
+        kubectl describe pods --all-namespaces | grep -A 5 -B 2 "Failed\|Error\|Warning" || echo "No pod issues found"
+
+    - name: Cleanup on failure
+      if: failure() && steps.list-changed.outputs.changed == 'true'
+      run: |
+        echo "=== Cleanup on failure ==="
+        echo "=== Final cluster state ==="
+        kubectl get pods --all-namespaces
+        kubectl get events --all-namespaces --sort-by=.lastTimestamp | tail -10
+        
+        echo "=== Pod descriptions for debugging ==="
+        kubectl describe pods --all-namespaces | grep -A 10 -B 3 "Failed\|Error\|Warning\|Pending" || echo "No problematic pods found"
+        
+        echo "=== Recent logs for debugging ==="
+        kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
+        
+        echo "=== Helm releases ==="
+        helm list --all-namespaces
      # the following would install only changed charts, but we only have one chart so 
      # don't worry about that for now
      # run: ct install --target-branch ${{ github.event.repository.default_branch }}
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -11,6 +11,12 @@ on:
      - "release/**"

 env:
+  # Private Registry Configuration
+  PRIVATE_REGISTRY: experimental-registry.blacksmith.sh:5000
+  PRIVATE_REGISTRY_USERNAME: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
+  PRIVATE_REGISTRY_PASSWORD: ${{ secrets.PRIVATE_REGISTRY_PASSWORD }}
+
+  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
@@ -23,18 +29,38 @@ env:
  PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
  PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
-  PLATFORM_PAIR: linux-amd64

 jobs:
-  integration-tests:
-    # See https://runs-on.com/runners/linux/
-    runs-on:
-      [
-        runs-on,
-        runner=32cpu-linux-x64,
-        disk=large,
-        "run-id=${{ github.run_id }}",
-      ]
+  discover-test-dirs:
+    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+    outputs:
+      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Discover test directories
+        id: set-matrix
+        run: |
+          # Find all leaf-level directories in both test directories
+          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
+          connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
+
+          # Create JSON array with directory info
+          all_dirs=""
+          for dir in $tests_dirs; do
+            all_dirs="$all_dirs{\"path\":\"tests/$dir\",\"name\":\"tests-$dir\"},"
+          done
+          for dir in $connector_dirs; do
+            all_dirs="$all_dirs{\"path\":\"connector_job_tests/$dir\",\"name\":\"connector-$dir\"},"
+          done
+
+          # Remove trailing comma and wrap in array
+          all_dirs="[${all_dirs%,}]"
+          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
+
+  prepare-build:
+    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
@@ -47,12 +73,12 @@ jobs:
          cache-dependency-path: |
            backend/requirements/default.txt
            backend/requirements/dev.txt
-            backend/requirements/ee.txt
-      - run: |
+
+      - name: Install Python dependencies
+        run: |
          python -m pip install --upgrade pip
          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/ee.txt

      - name: Generate OpenAPI schema
        working-directory: ./backend
@@ -74,130 +100,153 @@ jobs:
            --skip-validate-spec \
            --openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"

-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+      - name: Upload OpenAPI artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: openapi-artifacts
+          path: backend/generated/

+  build-backend-image:
+    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Login to Private Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.PRIVATE_REGISTRY }}
+          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
+          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+
+      - name: Set up Docker Buildx
+        uses: useblacksmith/setup-docker-builder@v1
+
+      - name: Build and push Backend Docker image
+        uses: useblacksmith/build-push-action@v2
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/arm64
+          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
+          push: true
+          outputs: type=registry
+
+  build-model-server-image:
+    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Login to Private Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.PRIVATE_REGISTRY }}
+          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
+          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+
+      - name: Set up Docker Buildx
+        uses: useblacksmith/setup-docker-builder@v1
+
+      - name: Build and push Model Server Docker image
+        uses: useblacksmith/build-push-action@v2
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/arm64
+          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}
+          push: true
+          outputs: type=registry
+          provenance: false
+
+  build-integration-image:
+    needs: prepare-build
+    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Login to Private Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.PRIVATE_REGISTRY }}
+          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
+          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+
+      - name: Download OpenAPI artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: openapi-artifacts
+          path: backend/generated/
+
+      - name: Set up Docker Buildx
+        uses: useblacksmith/setup-docker-builder@v1
+
+      - name: Build and push integration test Docker image
+        uses: useblacksmith/build-push-action@v2
+        with:
+          context: ./backend
+          file: ./backend/tests/integration/Dockerfile
+          platforms: linux/arm64
+          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}
+          push: true
+          outputs: type=registry
+
+  integration-tests:
+    needs:
+      [
+        discover-test-dirs,
+        build-backend-image,
+        build-model-server-image,
+        build-integration-image,
+      ]
+    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+
+    strategy:
+      fail-fast: false
+      matrix:
+        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Login to Private Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.PRIVATE_REGISTRY }}
+          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
+          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      # tag every docker image with "test" so that we can spin up the correct set
-      # of images during testing
-
-      # We don't need to build the Web Docker image since it's not yet used
-      # in the integration tests. We have a separate action to verify that it builds
-      # successfully.
-      - name: Pull Web Docker image
+      - name: Pull Docker images
        run: |
-          docker pull onyxdotapp/onyx-web-server:latest
-          docker tag onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:test
+          # Pull all images from registry in parallel
+          echo "Pulling Docker images in parallel..."
+          # Pull images from private registry
+          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
+          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
+          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &

-      # we use the runs-on cache for docker builds
-      # in conjunction with runs-on runners, it has better speed and unlimited caching
-      # https://runs-on.com/caching/s3-cache-for-github-actions/
-      # https://runs-on.com/caching/docker/
-      # https://github.com/moby/buildkit#s3-cache-experimental
+          # Wait for all background jobs to complete
+          wait
+          echo "All Docker images pulled successfully"

-      # images are built and run locally for testing purposes. Not pushed.
-      - name: Build Backend Docker image
-        uses: ./.github/actions/custom-build-and-push
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile
-          platforms: linux/amd64
-          tags: onyxdotapp/onyx-backend:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-      - name: Build Model Server Docker image
-        uses: ./.github/actions/custom-build-and-push
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/amd64
-          tags: onyxdotapp/onyx-model-server:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-      - name: Build integration test Docker image
-        uses: ./.github/actions/custom-build-and-push
-        with:
-          context: ./backend
-          file: ./backend/tests/integration/Dockerfile
-          platforms: linux/amd64
-          tags: onyxdotapp/onyx-integration:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-      # Start containers for multi-tenant tests
-      - name: Start Docker containers for multi-tenant tests
-        run: |
-          cd deployment/docker_compose
-          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
-          MULTI_TENANT=true \
-          AUTH_TYPE=cloud \
-          REQUIRE_EMAIL_VERIFICATION=false \
-          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
-          DEV_MODE=true \
-          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack up -d
-        id: start_docker_multi_tenant
-
-      # In practice, `cloud` Auth type would require OAUTH credentials to be set.
-      - name: Run Multi-Tenant Integration Tests
-        run: |
-          echo "Waiting for 3 minutes to ensure API server is ready..."
-          sleep 180
-          echo "Running integration tests..."
-          docker run --rm --network onyx-stack_default \
-            --name test-runner \
-            -e POSTGRES_HOST=relational_db \
-            -e POSTGRES_USER=postgres \
-            -e POSTGRES_PASSWORD=password \
-            -e DB_READONLY_USER=db_readonly_user \
-            -e DB_READONLY_PASSWORD=password \
-            -e POSTGRES_DB=postgres \
-            -e POSTGRES_USE_NULL_POOL=true \
-            -e VESPA_HOST=index \
-            -e REDIS_HOST=cache \
-            -e API_SERVER_HOST=api_server \
-            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
-            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-            -e TEST_WEB_HOSTNAME=test-runner \
-            -e AUTH_TYPE=cloud \
-            -e MULTI_TENANT=true \
-            -e REQUIRE_EMAIL_VERIFICATION=false \
-            -e DISABLE_TELEMETRY=true \
-            -e IMAGE_TAG=test \
-            -e DEV_MODE=true \
-            onyxdotapp/onyx-integration:test \
-            /app/tests/integration/multitenant_tests
-        continue-on-error: true
-        id: run_multitenant_tests
-
-      - name: Check multi-tenant test results
-        run: |
-          if [ ${{ steps.run_multitenant_tests.outcome }} == 'failure' ]; then
-            echo "Multi-tenant integration tests failed. Exiting with error."
-            exit 1
-          else
-            echo "All multi-tenant integration tests passed successfully."
-          fi
-
-      - name: Stop multi-tenant Docker containers
-        run: |
-          cd deployment/docker_compose
-          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack down -v
+          # Re-tag to remove registry prefix for docker-compose
+          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
+          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
+          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test

      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
+      # NOTE: don't need web server for integration tests
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
@@ -210,14 +259,23 @@ jobs:
          IMAGE_TAG=test \
          INTEGRATION_TESTS_MODE=true \
          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
-          docker compose -f docker-compose.dev.yml -p onyx-stack up -d
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
+            relational_db \
+            index \
+            cache \
+            minio \
+            api_server \
+            inference_model_server \
+            indexing_model_server \
+            background \
+            -d
        id: start_docker

      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."

-          docker logs -f onyx-stack-api_server-1 &
+          docker logs -f onyx-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
@@ -253,54 +311,44 @@ jobs:
          docker compose -f docker-compose.mock-it-services.yml \
            -p mock-it-services-stack up -d

-      # NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
-      # NOTE: `-e ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true` should be added once
-      # enterprise tests are fixed 
-      - name: Run Standard Integration Tests
-        run: |
-          echo "Running integration tests..."
-          docker run --rm --network onyx-stack_default \
-            --name test-runner \
-            -e POSTGRES_HOST=relational_db \
-            -e POSTGRES_USER=postgres \
-            -e POSTGRES_PASSWORD=password \
-            -e DB_READONLY_USER=db_readonly_user \
-            -e DB_READONLY_PASSWORD=password \
-            -e POSTGRES_DB=postgres \
-            -e POSTGRES_POOL_PRE_PING=true \
-            -e POSTGRES_USE_NULL_POOL=true \
-            -e VESPA_HOST=index \
-            -e REDIS_HOST=cache \
-            -e API_SERVER_HOST=api_server \
-            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
-            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-            -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
-            -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
-            -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-            -e JIRA_BASE_URL=${JIRA_BASE_URL} \
-            -e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
-            -e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
-            -e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
-            -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
-            -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
-            -e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
-            -e TEST_WEB_HOSTNAME=test-runner \
-            -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
-            -e MOCK_CONNECTOR_SERVER_PORT=8001 \
-            onyxdotapp/onyx-integration:test \
-            /app/tests/integration/tests \
-            /app/tests/integration/connector_job_tests
-        continue-on-error: true
-        id: run_tests
-
-      - name: Check test results
-        run: |
-          if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
-            echo "Integration tests failed. Exiting with error."
-            exit 1
-          else
-            echo "All integration tests passed successfully."
-          fi
+      - name: Run Integration Tests for ${{ matrix.test-dir.name }}
+        uses: nick-fields/retry@v3
+        with:
+          timeout_minutes: 20
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            echo "Running integration tests for ${{ matrix.test-dir.path }}..."
+            docker run --rm --network onyx_default \
+              --name test-runner \
+              -e POSTGRES_HOST=relational_db \
+              -e POSTGRES_USER=postgres \
+              -e POSTGRES_PASSWORD=password \
+              -e POSTGRES_DB=postgres \
+              -e DB_READONLY_USER=db_readonly_user \
+              -e DB_READONLY_PASSWORD=password \
+              -e POSTGRES_POOL_PRE_PING=true \
+              -e POSTGRES_USE_NULL_POOL=true \
+              -e VESPA_HOST=index \
+              -e REDIS_HOST=cache \
+              -e API_SERVER_HOST=api_server \
+              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
+              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
+              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
+              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
+              -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
+              -e JIRA_BASE_URL=${JIRA_BASE_URL} \
+              -e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
+              -e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
+              -e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
+              -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
+              -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
+              -e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
+              -e TEST_WEB_HOSTNAME=test-runner \
+              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
+              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
+              onyxdotapp/onyx-integration:test \
+              /app/tests/integration/${{ matrix.test-dir.path }}

      # ------------------------------------------------------------
      # Always gather logs BEFORE "down":
@@ -308,19 +356,19 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
+          docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true

      - name: Dump all-container logs (optional)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
        uses: actions/upload-artifact@v4
        with:
-          name: docker-all-logs
+          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

@@ -328,4 +376,158 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack down -v
+          docker compose down -v
+
+
+  multitenant-tests:
+    needs:
+      [
+        build-backend-image,
+        build-model-server-image,
+        build-integration-image,
+      ]
+    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Login to Private Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.PRIVATE_REGISTRY }}
+          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
+          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Pull Docker images
+        run: |
+          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
+          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
+          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
+          wait
+          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
+          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
+          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test
+
+      - name: Start Docker containers for multi-tenant tests
+        run: |
+          cd deployment/docker_compose
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
+          MULTI_TENANT=true \
+          AUTH_TYPE=cloud \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          IMAGE_TAG=test \
+          DEV_MODE=true \
+          docker compose -f docker-compose.multitenant-dev.yml up \
+            relational_db \
+            index \
+            cache \
+            minio \
+            api_server \
+            inference_model_server \
+            indexing_model_server \
+            background \
+            -d
+        id: start_docker_multi_tenant
+
+      - name: Wait for service to be ready (multi-tenant)
+        run: |
+          echo "Starting wait-for-service script for multi-tenant..."
+          docker logs -f onyx-api_server-1 &
+          start_time=$(date +%s)
+          timeout=300
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. Service did not become ready in 5 minutes."
+              exit 1
+            fi
+            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8080/health || echo "curl_error")
+            if [ "$response" = "200" ]; then
+              echo "Service is ready!"
+              break
+            elif [ "$response" = "curl_error" ]; then
+              echo "Curl encountered an error; retrying..."
+            else
+              echo "Service not ready yet (HTTP $response). Retrying in 5 seconds..."
+            fi
+            sleep 5
+          done
+          echo "Finished waiting for service."
+
+      - name: Run Multi-Tenant Integration Tests
+        run: |
+          echo "Running multi-tenant integration tests..."
+          docker run --rm --network onyx_default \
+            --name test-runner \
+            -e POSTGRES_HOST=relational_db \
+            -e POSTGRES_USER=postgres \
+            -e POSTGRES_PASSWORD=password \
+            -e DB_READONLY_USER=db_readonly_user \
+            -e DB_READONLY_PASSWORD=password \
+            -e POSTGRES_DB=postgres \
+            -e POSTGRES_USE_NULL_POOL=true \
+            -e VESPA_HOST=index \
+            -e REDIS_HOST=cache \
+            -e API_SERVER_HOST=api_server \
+            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
+            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
+            -e TEST_WEB_HOSTNAME=test-runner \
+            -e AUTH_TYPE=cloud \
+            -e MULTI_TENANT=true \
+            -e SKIP_RESET=true \
+            -e REQUIRE_EMAIL_VERIFICATION=false \
+            -e DISABLE_TELEMETRY=true \
+            -e IMAGE_TAG=test \
+            -e DEV_MODE=true \
+            onyxdotapp/onyx-integration:test \
+            /app/tests/integration/multitenant_tests
+
+      - name: Dump API server logs (multi-tenant)
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.multitenant-dev.yml logs --no-color api_server > $GITHUB_WORKSPACE/api_server_multitenant.log || true
+
+      - name: Dump all-container logs (multi-tenant)
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.multitenant-dev.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose-multitenant.log || true
+
+      - name: Upload logs (multi-tenant)
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: docker-all-logs-multitenant
+          path: ${{ github.workspace }}/docker-compose-multitenant.log
+
+      - name: Stop multi-tenant Docker containers
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.multitenant-dev.yml down -v
+
+  required: 
+    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+    needs: [integration-tests, multitenant-tests]
+    if: ${{ always() }}
+    steps:
+      - uses: actions/github-script@v7
+        with:
+          script: |
+            const needs = ${{ toJSON(needs) }};
+            const failed = Object.values(needs).some(n => n.result !== 'success');
+            if (failed) {
+              core.setFailed('One or more upstream jobs failed or were cancelled.');
+            } else {
+              core.notice('All required jobs succeeded.');
+            }
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -5,12 +5,15 @@ concurrency:

 on:
  merge_group:
-  pull_request:
-    branches:
-      - main
-      - "release/**"
+    types: [checks_requested]

 env:
+  # Private Registry Configuration
+  PRIVATE_REGISTRY: experimental-registry.blacksmith.sh:5000
+  PRIVATE_REGISTRY_USERNAME: ${{ secrets.PRIVATE_REGISTRY_USERNAME }}
+  PRIVATE_REGISTRY_PASSWORD: ${{ secrets.PRIVATE_REGISTRY_PASSWORD }}
+
+  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
@@ -23,21 +26,42 @@ env:
  PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
  PERM_SYNC_SHAREPOINT_DIRECTORY_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_DIRECTORY_ID }}
-  PLATFORM_PAIR: linux-amd64
+
 jobs:
-  integration-tests-mit:
-    # See https://runs-on.com/runners/linux/
-    runs-on:
-      [
-        runs-on,
-        runner=32cpu-linux-x64,
-        disk=large,
-        "run-id=${{ github.run_id }}",
-      ]
+  discover-test-dirs:
+    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+    outputs:
+      test-dirs: ${{ steps.set-matrix.outputs.test-dirs }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
-        
+
+      - name: Discover test directories
+        id: set-matrix
+        run: |
+          # Find all leaf-level directories in both test directories
+          tests_dirs=$(find backend/tests/integration/tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
+          connector_dirs=$(find backend/tests/integration/connector_job_tests -mindepth 1 -maxdepth 1 -type d ! -name "__pycache__" -exec basename {} \; | sort)
+
+          # Create JSON array with directory info
+          all_dirs=""
+          for dir in $tests_dirs; do
+            all_dirs="$all_dirs{\"path\":\"tests/$dir\",\"name\":\"tests-$dir\"},"
+          done
+          for dir in $connector_dirs; do
+            all_dirs="$all_dirs{\"path\":\"connector_job_tests/$dir\",\"name\":\"connector-$dir\"},"
+          done
+
+          # Remove trailing comma and wrap in array
+          all_dirs="[${all_dirs%,}]"
+          echo "test-dirs=$all_dirs" >> $GITHUB_OUTPUT
+
+  prepare-build:
+    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
      - name: Setup Python
        uses: actions/setup-python@v5
        with:
@@ -46,7 +70,9 @@ jobs:
          cache-dependency-path: |
            backend/requirements/default.txt
            backend/requirements/dev.txt
-      - run: |
+
+      - name: Install Python dependencies
+        run: |
          python -m pip install --upgrade pip
          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
@@ -70,71 +96,155 @@ jobs:
            --package-name onyx_openapi_client \
            --skip-validate-spec \
            --openapi-normalizer "SIMPLIFY_ONEOF_ANYOF=true,SET_OAS3_NULLABLE=true"
-            
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3

+      - name: Upload OpenAPI artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: openapi-artifacts
+          path: backend/generated/
+
+  build-backend-image:
+    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Login to Private Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.PRIVATE_REGISTRY }}
+          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
+          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+
+      - name: Set up Docker Buildx
+        uses: useblacksmith/setup-docker-builder@v1
+
+      - name: Build and push Backend Docker image
+        uses: useblacksmith/build-push-action@v2
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/arm64
+          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
+          push: true
+          outputs: type=registry
+
+  build-model-server-image:
+    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Login to Private Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.PRIVATE_REGISTRY }}
+          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
+          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+
+      - name: Set up Docker Buildx
+        uses: useblacksmith/setup-docker-builder@v1
+
+      - name: Build and push Model Server Docker image
+        uses: useblacksmith/build-push-action@v2
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/arm64
+          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}
+          push: true
+          outputs: type=registry
+          provenance: false
+
+  build-integration-image:
+    needs: prepare-build
+    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Login to Private Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.PRIVATE_REGISTRY }}
+          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
+          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+
+      - name: Download OpenAPI artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: openapi-artifacts
+          path: backend/generated/
+
+      - name: Set up Docker Buildx
+        uses: useblacksmith/setup-docker-builder@v1
+
+      - name: Build and push integration test Docker image
+        uses: useblacksmith/build-push-action@v2
+        with:
+          context: ./backend
+          file: ./backend/tests/integration/Dockerfile
+          platforms: linux/arm64
+          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}
+          push: true
+          outputs: type=registry
+
+  integration-tests-mit:
+    needs:
+      [
+        discover-test-dirs,
+        build-backend-image,
+        build-model-server-image,
+        build-integration-image,
+      ]
+    # See https://docs.blacksmith.sh/blacksmith-runners/overview
+    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+
+    strategy:
+      fail-fast: false
+      matrix:
+        test-dir: ${{ fromJson(needs.discover-test-dirs.outputs.test-dirs) }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Login to Private Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.PRIVATE_REGISTRY }}
+          username: ${{ env.PRIVATE_REGISTRY_USERNAME }}
+          password: ${{ env.PRIVATE_REGISTRY_PASSWORD }}
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      # tag every docker image with "test" so that we can spin up the correct set
-      # of images during testing
-
-      # We don't need to build the Web Docker image since it's not yet used
-      # in the integration tests. We have a separate action to verify that it builds
-      # successfully.
-      - name: Pull Web Docker image
+      - name: Pull Docker images
        run: |
-          docker pull onyxdotapp/onyx-web-server:latest
-          docker tag onyxdotapp/onyx-web-server:latest onyxdotapp/onyx-web-server:test
+          # Pull all images from registry in parallel
+          echo "Pulling Docker images in parallel..."
+          # Pull images from private registry
+          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
+          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
+          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &

-      # we use the runs-on cache for docker builds
-      # in conjunction with runs-on runners, it has better speed and unlimited caching
-      # https://runs-on.com/caching/s3-cache-for-github-actions/
-      # https://runs-on.com/caching/docker/
-      # https://github.com/moby/buildkit#s3-cache-experimental
+          # Wait for all background jobs to complete
+          wait
+          echo "All Docker images pulled successfully"

-      # images are built and run locally for testing purposes. Not pushed.
-      - name: Build Backend Docker image
-        uses: ./.github/actions/custom-build-and-push
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile
-          platforms: linux/amd64
-          tags: onyxdotapp/onyx-backend:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-      - name: Build Model Server Docker image
-        uses: ./.github/actions/custom-build-and-push
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/amd64
-          tags: onyxdotapp/onyx-model-server:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-      - name: Build integration test Docker image
-        uses: ./.github/actions/custom-build-and-push
-        with:
-          context: ./backend
-          file: ./backend/tests/integration/Dockerfile
-          platforms: linux/amd64
-          tags: onyxdotapp/onyx-integration:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+          # Re-tag to remove registry prefix for docker-compose
+          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
+          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
+          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }} onyxdotapp/onyx-integration:test

      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
+      # NOTE: don't need web server for integration tests
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
@@ -145,14 +255,23 @@ jobs:
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
          INTEGRATION_TESTS_MODE=true \
-          docker compose -f docker-compose.dev.yml -p onyx-stack up -d
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
+            relational_db \
+            index \
+            cache \
+            minio \
+            api_server \
+            inference_model_server \
+            indexing_model_server \
+            background \
+            -d
        id: start_docker

      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."

-          docker logs -f onyx-stack-api_server-1 &
+          docker logs -f onyx-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
@@ -189,51 +308,44 @@ jobs:
            -p mock-it-services-stack up -d

      # NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
-      - name: Run Standard Integration Tests
-        run: |
-          echo "Running integration tests..."
-          docker run --rm --network onyx-stack_default \
-            --name test-runner \
-            -e POSTGRES_HOST=relational_db \
-            -e POSTGRES_USER=postgres \
-            -e POSTGRES_PASSWORD=password \
-            -e POSTGRES_DB=postgres \
-            -e DB_READONLY_USER=db_readonly_user \
-            -e DB_READONLY_PASSWORD=password \
-            -e POSTGRES_POOL_PRE_PING=true \
-            -e POSTGRES_USE_NULL_POOL=true \
-            -e VESPA_HOST=index \
-            -e REDIS_HOST=cache \
-            -e API_SERVER_HOST=api_server \
-            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
-            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-            -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
-            -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
-            -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-            -e JIRA_BASE_URL=${JIRA_BASE_URL} \
-            -e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
-            -e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
-            -e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
-            -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
-            -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
-            -e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
-            -e TEST_WEB_HOSTNAME=test-runner \
-            -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
-            -e MOCK_CONNECTOR_SERVER_PORT=8001 \
-            onyxdotapp/onyx-integration:test \
-            /app/tests/integration/tests \
-            /app/tests/integration/connector_job_tests
-        continue-on-error: true
-        id: run_tests
-
-      - name: Check test results
-        run: |
-          if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
-            echo "Integration tests failed. Exiting with error."
-            exit 1
-          else
-            echo "All integration tests passed successfully."
-          fi
+      - name: Run Integration Tests for ${{ matrix.test-dir.name }}
+        uses: nick-fields/retry@v3
+        with:
+          timeout_minutes: 20
+          max_attempts: 3
+          retry_wait_seconds: 10
+          command: |
+            echo "Running integration tests for ${{ matrix.test-dir.path }}..."
+            docker run --rm --network onyx_default \
+              --name test-runner \
+              -e POSTGRES_HOST=relational_db \
+              -e POSTGRES_USER=postgres \
+              -e POSTGRES_PASSWORD=password \
+              -e POSTGRES_DB=postgres \
+              -e DB_READONLY_USER=db_readonly_user \
+              -e DB_READONLY_PASSWORD=password \
+              -e POSTGRES_POOL_PRE_PING=true \
+              -e POSTGRES_USE_NULL_POOL=true \
+              -e VESPA_HOST=index \
+              -e REDIS_HOST=cache \
+              -e API_SERVER_HOST=api_server \
+              -e OPENAI_API_KEY=${OPENAI_API_KEY} \
+              -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
+              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
+              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
+              -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
+              -e JIRA_BASE_URL=${JIRA_BASE_URL} \
+              -e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
+              -e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
+              -e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
+              -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
+              -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
+              -e PERM_SYNC_SHAREPOINT_DIRECTORY_ID=${PERM_SYNC_SHAREPOINT_DIRECTORY_ID} \
+              -e TEST_WEB_HOSTNAME=test-runner \
+              -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
+              -e MOCK_CONNECTOR_SERVER_PORT=8001 \
+              onyxdotapp/onyx-integration:test \
+              /app/tests/integration/${{ matrix.test-dir.path }}

      # ------------------------------------------------------------
      # Always gather logs BEFORE "down":
@@ -241,19 +353,19 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
+          docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true

      - name: Dump all-container logs (optional)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
        uses: actions/upload-artifact@v4
        with:
-          name: docker-all-logs
+          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
      # ------------------------------------------------------------

@@ -261,4 +373,21 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack down -v
+          docker compose down -v
+
+  
+  required: 
+    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
+    needs: [integration-tests-mit]
+    if: ${{ always() }}
+    steps:
+      - uses: actions/github-script@v7
+        with:
+          script: |
+            const needs = ${{ toJSON(needs) }};
+            const failed = Object.values(needs).some(n => n.result !== 'success');
+            if (failed) {
+              core.setFailed('One or more upstream jobs failed or were cancelled.');
+            } else {
+              core.notice('All required jobs succeeded.');
+            }
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -6,43 +6,171 @@ concurrency:
 on: push

 env:
+  # AWS ECR Configuration
+  AWS_REGION: ${{ secrets.AWS_REGION || 'us-west-2' }}
+  ECR_REGISTRY: ${{ secrets.ECR_REGISTRY }}
+  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID_ECR }}
+  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY_ECR }}
+  BUILDX_NO_DEFAULT_ATTESTATIONS: 1
+  
+  # Test Environment Variables
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
+
+  # for federated slack tests
+  SLACK_CLIENT_ID: ${{ secrets.SLACK_CLIENT_ID }}
+  SLACK_CLIENT_SECRET: ${{ secrets.SLACK_CLIENT_SECRET }}
+
  MOCK_LLM_RESPONSE: true

 jobs:
-  playwright-tests:
-    name: Playwright Tests
+  build-web-image:
+    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4

-    # See https://runs-on.com/runners/linux/
-    runs-on:
-      [
-        runs-on,
-        runner=32cpu-linux-x64,
-        disk=large,
-        "run-id=${{ github.run_id }}",
-      ]
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ env.AWS_REGION }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Set up Docker Buildx
+        uses: useblacksmith/setup-docker-builder@v1
+
+      - name: Build and push Web Docker image
+        uses: useblacksmith/build-push-action@v2
+        with:
+          context: ./web
+          file: ./web/Dockerfile
+          platforms: linux/arm64
+          tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }}
+          provenance: false
+          sbom: false
+          push: true
+          outputs: type=registry
+          # no-cache: true
+
+  build-backend-image:
+    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ env.AWS_REGION }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Set up Docker Buildx
+        uses: useblacksmith/setup-docker-builder@v1
+
+      - name: Build and push Backend Docker image
+        uses: useblacksmith/build-push-action@v2
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile
+          platforms: linux/arm64
+          tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }}
+          provenance: false
+          sbom: false
+          push: true
+          outputs: type=registry
+          # no-cache: true
+
+  build-model-server-image:
+    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ env.AWS_REGION }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Set up Docker Buildx
+        uses: useblacksmith/setup-docker-builder@v1
+
+      - name: Build and push Model Server Docker image
+        uses: useblacksmith/build-push-action@v2
+        with:
+          context: ./backend
+          file: ./backend/Dockerfile.model_server
+          platforms: linux/arm64
+          tags: ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }}
+          provenance: false
+          sbom: false
+          push: true
+          outputs: type=registry
+          # no-cache: true
+
+  playwright-tests:
+    needs: [build-web-image, build-backend-image, build-model-server-image]
+    name: Playwright Tests
+    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0

-      - name: Set up Python
-        uses: actions/setup-python@v5
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
        with:
-          python-version: "3.11"
-          cache: "pip"
-          cache-dependency-path: |
-            backend/requirements/default.txt
-            backend/requirements/dev.txt
-            backend/requirements/model_server.txt
-      - run: |
-          python -m pip install --upgrade pip
-          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-          pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
+          aws-access-key-id: ${{ env.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ env.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ env.AWS_REGION }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
+      # needed for pulling Vespa, Redis, Postgres, and Minio images
+      # otherwise, we hit the "Unauthenticated users" limit
+      # https://docs.docker.com/docker-hub/usage/
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      - name: Pull Docker images
+        run: |
+          # Pull all images from ECR in parallel
+          echo "Pulling Docker images in parallel..."
+          (docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }}) &
+          (docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }}) &
+          (docker pull ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }}) &
+
+          # Wait for all background jobs to complete
+          wait
+          echo "All Docker images pulled successfully"
+
+          # Re-tag with expected names for docker-compose
+          docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-web-server:playwright-test-${{ github.run_id }} onyxdotapp/onyx-web-server:test
+          docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-backend:playwright-test-${{ github.run_id }} onyxdotapp/onyx-backend:test
+          docker tag ${{ env.ECR_REGISTRY }}/integration-test-onyx-model-server:playwright-test-${{ github.run_id }} onyxdotapp/onyx-model-server:test

      - name: Setup node
        uses: actions/setup-node@v4
@@ -57,79 +185,29 @@ jobs:
        working-directory: ./web
        run: npx playwright install --with-deps

-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      # tag every docker image with "test" so that we can spin up the correct set
-      # of images during testing
-
-      # we use the runs-on cache for docker builds
-      # in conjunction with runs-on runners, it has better speed and unlimited caching
-      # https://runs-on.com/caching/s3-cache-for-github-actions/
-      # https://runs-on.com/caching/docker/
-      # https://github.com/moby/buildkit#s3-cache-experimental
-
-      # images are built and run locally for testing purposes. Not pushed.
-
-      - name: Build Web Docker image
-        uses: ./.github/actions/custom-build-and-push
-        with:
-          context: ./web
-          file: ./web/Dockerfile
-          platforms: linux/amd64
-          tags: onyxdotapp/onyx-web-server:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/web-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-      - name: Build Backend Docker image
-        uses: ./.github/actions/custom-build-and-push
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile
-          platforms: linux/amd64
-          tags: onyxdotapp/onyx-backend:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-      - name: Build Model Server Docker image
-        uses: ./.github/actions/custom-build-and-push
-        with:
-          context: ./backend
-          file: ./backend/Dockerfile.model_server
-          platforms: linux/amd64
-          tags: onyxdotapp/onyx-model-server:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
+      - name: Create .env file for Docker Compose
+        run: |
+          cat <<EOF > deployment/docker_compose/.env
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
+          AUTH_TYPE=basic
+          GEN_AI_API_KEY=${{ env.OPENAI_API_KEY }}
+          EXA_API_KEY=${{ env.EXA_API_KEY }}
+          REQUIRE_EMAIL_VERIFICATION=false
+          DISABLE_TELEMETRY=true
+          IMAGE_TAG=test
+          EOF

      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
-          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
-          AUTH_TYPE=basic \
-          GEN_AI_API_KEY=${{ secrets.OPENAI_API_KEY }} \
-          REQUIRE_EMAIL_VERIFICATION=false \
-          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
-          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
+          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
        id: start_docker

      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."

-          docker logs -f danswer-stack-api_server-1 &
+          docker logs -f onyx-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
@@ -161,14 +239,16 @@ jobs:

      - name: Run Playwright tests
        working-directory: ./web
-        run: npx playwright test
+        run: |
+          # Create test-results directory to ensure it exists for artifact upload
+          mkdir -p test-results
+          npx playwright test

      - uses: actions/upload-artifact@v4
        if: always()
        with:
-          # Chromatic automatically defaults to the test-results directory.
-          # Replace with the path to your custom directory and adjust the CHROMATIC_ARCHIVE_LOCATION environment variable accordingly.
-          name: test-results
+          # Includes test results and debug screenshots
+          name: playwright-test-results-${{ github.run_id }}
          path: ./web/test-results
          retention-days: 30

@@ -177,7 +257,7 @@ jobs:
        if: success() || failure()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
+          docker compose logs > docker-compose.log
          mv docker-compose.log ${{ github.workspace }}/docker-compose.log

      - name: Upload logs
@@ -190,7 +270,7 @@ jobs:
      - name: Stop Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
+          docker compose down -v

 # NOTE: Chromatic UI diff testing is currently disabled.
 # We are using Playwright for local and CI testing without visual regression checks.
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -96,6 +96,13 @@ env:
  TEAMS_DIRECTORY_ID: ${{ secrets.TEAMS_DIRECTORY_ID }}
  TEAMS_SECRET: ${{ secrets.TEAMS_SECRET }}

+  # Bitbucket
+  BITBUCKET_WORKSPACE: ${{ secrets.BITBUCKET_WORKSPACE }}
+  BITBUCKET_REPOSITORIES: ${{ secrets.BITBUCKET_REPOSITORIES }}
+  BITBUCKET_PROJECTS: ${{ secrets.BITBUCKET_PROJECTS }}
+  BITBUCKET_EMAIL: ${{ secrets.BITBUCKET_EMAIL }}
+  BITBUCKET_API_TOKEN: ${{ secrets.BITBUCKET_API_TOKEN }}
+
 jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -77,7 +77,7 @@ jobs:
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
-          docker compose -f docker-compose.model-server-test.yml -p onyx-stack up -d indexing_model_server
+          docker compose -f docker-compose.model-server-test.yml up -d indexing_model_server
        id: start_docker

      - name: Wait for service to be ready
@@ -132,7 +132,7 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.model-server-test.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+          docker compose -f docker-compose.model-server-test.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
@@ -145,5 +145,5 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.model-server-test.yml -p onyx-stack down -v
+          docker compose -f docker-compose.model-server-test.yml down -v
          
--- a/.github/workflows/pr-python-tests.yml
+++ b/.github/workflows/pr-python-tests.yml
@@ -31,12 +31,14 @@ jobs:
        cache-dependency-path: |
          backend/requirements/default.txt
          backend/requirements/dev.txt
+          backend/requirements/model_server.txt

    - name: Install Dependencies
      run: |
        python -m pip install --upgrade pip
        pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
        pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
+        pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt

    - name: Run Tests
      shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,7 @@ backend/tests/regression/answer_quality/test_data.json
 backend/tests/regression/search_quality/eval-*
 backend/tests/regression/search_quality/search_eval_config.yaml
 backend/tests/regression/search_quality/*.json
+backend/onyx/evals/data/
 *.log

 # secret files
@@ -28,6 +29,7 @@ settings.json
 /deployment/data/nginx/app.conf
 *.sw?
 /backend/tests/regression/answer_quality/search_test_config.yaml
+*.egg-info

 # Local .terraform directories
 **/.terraform/*
--- a/.mcp.json.template
+++ b/.mcp.json.template
@@ -0,0 +1,8 @@
+{
+  "mcpServers": {
+    "onyx-mcp": {
+      "type": "http",
+      "url": "http://localhost:8000/mcp"
+    }
+  }
+}
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -37,6 +37,15 @@ repos:
      additional_dependencies:
      - prettier

+  - repo: local
+    hooks:
+      - id: check-lazy-imports
+        name: Check lazy imports are not directly imported
+        entry: python3 backend/scripts/check_lazy_imports.py
+        language: system
+        files: ^backend/.*\.py$
+        pass_filenames: false
+
  # We would like to have a mypy pre-commit hook, but due to the fact that
  # pre-commit runs in it's own isolated environment, we would need to install
  # and keep in sync all dependencies so mypy has access to the appropriate type
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@@ -10,7 +10,7 @@ SKIP_WARM_UP=True

 # Always keep these on for Dev
 # Logs all model prompts to stdout
-LOG_DANSWER_MODEL_INTERACTIONS=True
+LOG_ONYX_MODEL_INTERACTIONS=True
 # More verbose logging
 LOG_LEVEL=debug

@@ -39,8 +39,8 @@ FAST_GEN_AI_MODEL_VERSION=gpt-4o

 # For Danswer Slack Bot, overrides the UI values so no need to set this up via UI every time
 # Only needed if using DanswerBot
-#DANSWER_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
-#DANSWER_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>
+#ONYX_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
+#ONYX_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>


 # Python stuff
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -1,422 +1,468 @@
 /* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */

 {
-    // Use IntelliSense to learn about possible attributes.
-    // Hover to view descriptions of existing attributes.
-    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
-    "version": "0.2.0",
-    "compounds": [
-      {
-        // Dummy entry used to label the group
-        "name": "--- Compound ---",
-        "configurations": ["--- Individual ---"],
-        "presentation": {
-          "group": "1"
-        }
-      },
-      {
-        "name": "Run All Onyx Services",
-        "configurations": [
-          "Web Server",
-          "Model Server",
-          "API Server",
-          "Slack Bot",
-          "Celery primary",
-          "Celery light",
-          "Celery heavy",
-          "Celery docfetching",
-          "Celery docprocessing",
-          "Celery beat",
-          "Celery monitoring"
-        ],
-        "presentation": {
-          "group": "1"
-        },
-        "stopAll": true
-      },
-      {
-        "name": "Web / Model / API",
-        "configurations": ["Web Server", "Model Server", "API Server"],
-        "presentation": {
-          "group": "1"
-        },
-        "stopAll": true
-      },
-      {
-        "name": "Celery (all)",
-        "configurations": [
-          "Celery primary",
-          "Celery light",
-          "Celery heavy",
-          "Celery docfetching",
-          "Celery docprocessing",
-          "Celery beat",
-          "Celery monitoring"
-        ],
-        "presentation": {
-          "group": "1"
-        },
-        "stopAll": true
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "compounds": [
+    {
+      // Dummy entry used to label the group
+      "name": "--- Compound ---",
+      "configurations": ["--- Individual ---"],
+      "presentation": {
+        "group": "1"
      }
-    ],
-    "configurations": [
-      {
-        // Dummy entry used to label the group
-        "name": "--- Individual ---",
-        "type": "node",
-        "request": "launch",
-        "presentation": {
-          "group": "2",
-          "order": 0
-        }
-      },
-      {
-        "name": "Web Server",
-        "type": "node",
-        "request": "launch",
-        "cwd": "${workspaceRoot}/web",
-        "runtimeExecutable": "npm",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "runtimeArgs": ["run", "dev"],
-        "presentation": {
-          "group": "2"
-        },
-        "console": "integratedTerminal",
-        "consoleTitle": "Web Server Console"
-      },
-      {
-        "name": "Model Server",
-        "consoleName": "Model Server",
-        "type": "debugpy",
-        "request": "launch",
-        "module": "uvicorn",
-        "cwd": "${workspaceFolder}/backend",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "env": {
-          "LOG_LEVEL": "DEBUG",
-          "PYTHONUNBUFFERED": "1"
-        },
-        "args": ["model_server.main:app", "--reload", "--port", "9000"],
-        "presentation": {
-          "group": "2"
-        },
-        "consoleTitle": "Model Server Console"
-      },
-      {
-        "name": "API Server",
-        "consoleName": "API Server",
-        "type": "debugpy",
-        "request": "launch",
-        "module": "uvicorn",
-        "cwd": "${workspaceFolder}/backend",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "env": {
-          "LOG_DANSWER_MODEL_INTERACTIONS": "True",
-          "LOG_LEVEL": "DEBUG",
-          "PYTHONUNBUFFERED": "1"
-        },
-        "args": ["onyx.main:app", "--reload", "--port", "8080"],
-        "presentation": {
-          "group": "2"
-        },
-        "consoleTitle": "API Server Console"
-      },
-      // For the listener to access the Slack API,
-      // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
-      {
-        "name": "Slack Bot",
-        "consoleName": "Slack Bot",
-        "type": "debugpy",
-        "request": "launch",
-        "program": "onyx/onyxbot/slack/listener.py",
-        "cwd": "${workspaceFolder}/backend",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "env": {
-          "LOG_LEVEL": "DEBUG",
-          "PYTHONUNBUFFERED": "1",
-          "PYTHONPATH": "."
-        },
-        "presentation": {
-          "group": "2"
-        },
-        "consoleTitle": "Slack Bot Console"
-      },
-      {
-        "name": "Celery primary",
-        "type": "debugpy",
-        "request": "launch",
-        "module": "celery",
-        "cwd": "${workspaceFolder}/backend",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "env": {
-          "LOG_LEVEL": "INFO",
-          "PYTHONUNBUFFERED": "1",
-          "PYTHONPATH": "."
-        },
-        "args": [
-          "-A",
-          "onyx.background.celery.versioned_apps.primary",
-          "worker",
-          "--pool=threads",
-          "--concurrency=4",
-          "--prefetch-multiplier=1",
-          "--loglevel=INFO",
-          "--hostname=primary@%n",
-          "-Q",
-          "celery"
-        ],
-        "presentation": {
-          "group": "2"
-        },
-        "consoleTitle": "Celery primary Console"
-      },
-      {
-        "name": "Celery light",
-        "type": "debugpy",
-        "request": "launch",
-        "module": "celery",
-        "cwd": "${workspaceFolder}/backend",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "env": {
-          "LOG_LEVEL": "INFO",
-          "PYTHONUNBUFFERED": "1",
-          "PYTHONPATH": "."
-        },
-        "args": [
-          "-A",
-          "onyx.background.celery.versioned_apps.light",
-          "worker",
-          "--pool=threads",
-          "--concurrency=64",
-          "--prefetch-multiplier=8",
-          "--loglevel=INFO",
-          "--hostname=light@%n",
-          "-Q",
-          "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup"
-        ],
-        "presentation": {
-          "group": "2"
-        },
-        "consoleTitle": "Celery light Console"
-      },
-      {
-        "name": "Celery heavy",
-        "type": "debugpy",
-        "request": "launch",
-        "module": "celery",
-        "cwd": "${workspaceFolder}/backend",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "env": {
-          "LOG_LEVEL": "INFO",
-          "PYTHONUNBUFFERED": "1",
-          "PYTHONPATH": "."
-        },
-        "args": [
-          "-A",
-          "onyx.background.celery.versioned_apps.heavy",
-          "worker",
-          "--pool=threads",
-          "--concurrency=4",
-          "--prefetch-multiplier=1",
-          "--loglevel=INFO",
-          "--hostname=heavy@%n",
-          "-Q",
-          "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync"
-        ],
-        "presentation": {
-          "group": "2"
-        },
-        "consoleTitle": "Celery heavy Console"
-      },
-      {
-        "name": "Celery docfetching",
-        "type": "debugpy",
-        "request": "launch",
-        "module": "celery",
-        "cwd": "${workspaceFolder}/backend",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "env": {
-            "LOG_LEVEL": "DEBUG",
-            "PYTHONUNBUFFERED": "1",
-            "PYTHONPATH": "."
-        },
-        "args": [
-            "-A",
-            "onyx.background.celery.versioned_apps.docfetching",
-            "worker",
-            "--pool=threads",
-            "--concurrency=1",
-            "--prefetch-multiplier=1",
-            "--loglevel=INFO",
-            "--hostname=docfetching@%n",
-            "-Q",
-            "connector_doc_fetching,user_files_indexing"
-        ],
-        "presentation": {
-            "group": "2"
-        },
-        "consoleTitle": "Celery docfetching Console",
-        "justMyCode": false
    },
    {
-        "name": "Celery docprocessing",
-        "type": "debugpy",
-        "request": "launch",
-        "module": "celery",
-        "cwd": "${workspaceFolder}/backend",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "env": {
-            "ENABLE_MULTIPASS_INDEXING": "false",
-            "LOG_LEVEL": "DEBUG",
-            "PYTHONUNBUFFERED": "1",
-            "PYTHONPATH": "."
-        },
-        "args": [
-            "-A",
-            "onyx.background.celery.versioned_apps.docprocessing",
-            "worker",
-            "--pool=threads",
-            "--concurrency=6",
-            "--prefetch-multiplier=1",
-            "--loglevel=INFO",
-            "--hostname=docprocessing@%n",
-            "-Q",
-            "docprocessing"
-        ],
-        "presentation": {
-            "group": "2"
-        },
-        "consoleTitle": "Celery docprocessing Console",
-        "justMyCode": false
+      "name": "Run All Onyx Services",
+      "configurations": [
+        "Web Server",
+        "Model Server",
+        "API Server",
+        "Slack Bot",
+        "Celery primary",
+        "Celery light",
+        "Celery heavy",
+        "Celery docfetching",
+        "Celery docprocessing",
+        "Celery beat",
+        "Celery monitoring",
+        "Celery user file processing"
+      ],
+      "presentation": {
+        "group": "1"
+      }
    },
-      {
-        "name": "Celery monitoring",
-        "type": "debugpy",
-        "request": "launch",
-        "module": "celery",
-        "cwd": "${workspaceFolder}/backend",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "env": {},
-        "args": [
-          "-A",
-          "onyx.background.celery.versioned_apps.monitoring",
-          "worker",
-          "--pool=solo",
-          "--concurrency=1",
-          "--prefetch-multiplier=1",
-          "--loglevel=INFO",
-          "--hostname=monitoring@%n",
-          "-Q",
-          "monitoring"
-        ],
-        "presentation": {
-          "group": "2"
-        },
-        "consoleTitle": "Celery monitoring Console"
+    {
+      "name": "Web / Model / API",
+      "configurations": ["Web Server", "Model Server", "API Server"],
+      "presentation": {
+        "group": "1"
+      }
+    },
+    {
+      "name": "Celery (all)",
+      "configurations": [
+        "Celery primary",
+        "Celery light",
+        "Celery heavy",
+        "Celery docfetching",
+        "Celery docprocessing",
+        "Celery beat",
+        "Celery monitoring",
+        "Celery user file processing"
+      ],
+      "presentation": {
+        "group": "1"
      },
-      {
-        "name": "Celery beat",
-        "type": "debugpy",
-        "request": "launch",
-        "module": "celery",
-        "cwd": "${workspaceFolder}/backend",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "env": {
-          "LOG_LEVEL": "DEBUG",
-          "PYTHONUNBUFFERED": "1",
-          "PYTHONPATH": "."
-        },
-        "args": [
-          "-A",
-          "onyx.background.celery.versioned_apps.beat",
-          "beat",
-          "--loglevel=INFO"
-        ],
-        "presentation": {
-          "group": "2"
-        },
-        "consoleTitle": "Celery beat Console"
+      "stopAll": true
+    }
+  ],
+  "configurations": [
+    {
+      // Dummy entry used to label the group
+      "name": "--- Individual ---",
+      "type": "node",
+      "request": "launch",
+      "presentation": {
+        "group": "2",
+        "order": 0
+      }
+    },
+    {
+      "name": "Web Server",
+      "type": "node",
+      "request": "launch",
+      "cwd": "${workspaceRoot}/web",
+      "runtimeExecutable": "npm",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "runtimeArgs": ["run", "dev"],
+      "presentation": {
+        "group": "2"
      },
-      {
-        "name": "Pytest",
-        "consoleName": "Pytest",
-        "type": "debugpy",
-        "request": "launch",
-        "module": "pytest",
-        "cwd": "${workspaceFolder}/backend",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "env": {
-          "LOG_LEVEL": "DEBUG",
-          "PYTHONUNBUFFERED": "1",
-          "PYTHONPATH": "."
-        },
-        "args": [
-          "-v"
-          // Specify a sepcific module/test to run or provide nothing to run all tests
-          //"tests/unit/onyx/llm/answering/test_prune_and_merge.py"
-        ],
-        "presentation": {
-          "group": "2"
-        },
-        "consoleTitle": "Pytest Console"
+      "console": "integratedTerminal",
+      "consoleTitle": "Web Server Console"
+    },
+    {
+      "name": "Model Server",
+      "consoleName": "Model Server",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "uvicorn",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1"
      },
-      {
-        // Dummy entry used to label the group
-        "name": "--- Tasks ---",
-        "type": "node",
-        "request": "launch",
-        "presentation": {
-          "group": "3",
-          "order": 0
-        }
+      "args": ["model_server.main:app", "--reload", "--port", "9000"],
+      "presentation": {
+        "group": "2"
      },
-      {
-        "name": "Clear and Restart External Volumes and Containers",
-        "type": "node",
-        "request": "launch",
-        "runtimeExecutable": "bash",
-        "runtimeArgs": [
-          "${workspaceFolder}/backend/scripts/restart_containers.sh"
-        ],
-        "cwd": "${workspaceFolder}",
-        "console": "integratedTerminal",
-        "stopOnEntry": true,
-        "presentation": {
-          "group": "3"
-        }
+      "consoleTitle": "Model Server Console"
+    },
+    {
+      "name": "API Server",
+      "consoleName": "API Server",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "uvicorn",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_ONYX_MODEL_INTERACTIONS": "True",
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1"
      },
-      {
-        // Celery jobs launched through a single background script (legacy)
-        // Recommend using the "Celery (all)" compound launch instead.
-        "name": "Background Jobs",
-        "consoleName": "Background Jobs",
-        "type": "debugpy",
-        "request": "launch",
-        "program": "scripts/dev_run_background_jobs.py",
-        "cwd": "${workspaceFolder}/backend",
-        "envFile": "${workspaceFolder}/.vscode/.env",
-        "env": {
-          "LOG_DANSWER_MODEL_INTERACTIONS": "True",
-          "LOG_LEVEL": "DEBUG",
-          "PYTHONUNBUFFERED": "1",
-          "PYTHONPATH": "."
-        }
+      "args": ["onyx.main:app", "--reload", "--port", "8080"],
+      "presentation": {
+        "group": "2"
      },
-      {
-        "name": "Install Python Requirements",
-        "type": "node",
-        "request": "launch",
-        "runtimeExecutable": "bash",
-        "runtimeArgs": [
-          "-c",
-          "pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
-        ],
-        "cwd": "${workspaceFolder}",
-        "console": "integratedTerminal",
-        "presentation": {
-          "group": "3"
-        }
+      "consoleTitle": "API Server Console"
+    },
+    // For the listener to access the Slack API,
+    // ONYX_BOT_SLACK_APP_TOKEN & ONYX_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
+    {
+      "name": "Slack Bot",
+      "consoleName": "Slack Bot",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "onyx/onyxbot/slack/listener.py",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
      },
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Slack Bot Console"
+    },
+    {
+      "name": "Celery primary",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "INFO",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.primary",
+        "worker",
+        "--pool=threads",
+        "--concurrency=4",
+        "--prefetch-multiplier=1",
+        "--loglevel=INFO",
+        "--hostname=primary@%n",
+        "-Q",
+        "celery"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery primary Console"
+    },
+    {
+      "name": "Celery light",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "INFO",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.light",
+        "worker",
+        "--pool=threads",
+        "--concurrency=64",
+        "--prefetch-multiplier=8",
+        "--loglevel=INFO",
+        "--hostname=light@%n",
+        "-Q",
+        "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery light Console"
+    },
+    {
+      "name": "Celery heavy",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "INFO",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.heavy",
+        "worker",
+        "--pool=threads",
+        "--concurrency=4",
+        "--prefetch-multiplier=1",
+        "--loglevel=INFO",
+        "--hostname=heavy@%n",
+        "-Q",
+        "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery heavy Console"
+    },
+    {
+      "name": "Celery docfetching",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.docfetching",
+        "worker",
+        "--pool=threads",
+        "--concurrency=1",
+        "--prefetch-multiplier=1",
+        "--loglevel=INFO",
+        "--hostname=docfetching@%n",
+        "-Q",
+        "connector_doc_fetching,user_files_indexing"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery docfetching Console",
+      "justMyCode": false
+    },
+    {
+      "name": "Celery docprocessing",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "ENABLE_MULTIPASS_INDEXING": "false",
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.docprocessing",
+        "worker",
+        "--pool=threads",
+        "--concurrency=6",
+        "--prefetch-multiplier=1",
+        "--loglevel=INFO",
+        "--hostname=docprocessing@%n",
+        "-Q",
+        "docprocessing"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery docprocessing Console"
+    },
+    {
+      "name": "Celery beat",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.beat",
+        "beat",
+        "--loglevel=INFO"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery beat Console"
+    },
+    {
+      "name": "Celery monitoring",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {},
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.monitoring",
+        "worker",
+        "--pool=solo",
+        "--concurrency=1",
+        "--prefetch-multiplier=1",
+        "--loglevel=INFO",
+        "--hostname=monitoring@%n",
+        "-Q",
+        "monitoring"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery monitoring Console"
+    },
+    {
+      "name": "Celery user file processing",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "args": [
+        "-A",
+        "onyx.background.celery.versioned_apps.user_file_processing",
+        "worker",
+        "--loglevel=INFO",
+        "--hostname=user_file_processing@%n",
+        "--pool=threads",
+        "-Q",
+        "user_file_processing,user_file_project_sync"
+      ],
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Celery user file processing Console"
+    },
+    {
+      "name": "Pytest",
+      "consoleName": "Pytest",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "pytest",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": [
+        "-v"
+        // Specify a specific module/test to run or provide nothing to run all tests
+        // "tests/unit/onyx/llm/answering/test_prune_and_merge.py"
+      ],
+      "presentation": {
+        "group": "2"
+      },
+      "consoleTitle": "Pytest Console"
+    },
+    {
+      // Dummy entry used to label the group
+      "name": "--- Tasks ---",
+      "type": "node",
+      "request": "launch",
+      "presentation": {
+        "group": "3",
+        "order": 0
+      }
+    },
+    {
+      "name": "Clear and Restart External Volumes and Containers",
+      "type": "node",
+      "request": "launch",
+      "runtimeExecutable": "bash",
+      "runtimeArgs": [
+        "${workspaceFolder}/backend/scripts/restart_containers.sh"
+      ],
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
+      "stopOnEntry": true,
+      "presentation": {
+        "group": "3"
+      }
+    },
+    {
+      "name": "Eval CLI",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "${workspaceFolder}/backend/onyx/evals/eval_cli.py",
+      "cwd": "${workspaceFolder}/backend",
+      "console": "integratedTerminal",
+      "justMyCode": false,
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "presentation": {
+        "group": "3"
+      },
+      "env": {
+        "LOG_LEVEL": "INFO",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      },
+      "args": ["--verbose"],
+      "consoleTitle": "Eval CLI Console"
+    },
+    {
+      // Celery jobs launched through a single background script (legacy)
+      // Recommend using the "Celery (all)" compound launch instead.
+      "name": "Background Jobs",
+      "consoleName": "Background Jobs",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "scripts/dev_run_background_jobs.py",
+      "cwd": "${workspaceFolder}/backend",
+      "envFile": "${workspaceFolder}/.vscode/.env",
+      "env": {
+        "LOG_ONYX_MODEL_INTERACTIONS": "True",
+        "LOG_LEVEL": "DEBUG",
+        "PYTHONUNBUFFERED": "1",
+        "PYTHONPATH": "."
+      }
+    },
+    {
+      "name": "Install Python Requirements",
+      "type": "node",
+      "request": "launch",
+      "runtimeExecutable": "bash",
+      "runtimeArgs": [
+        "-c",
+        "pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
+      ],
+      "cwd": "${workspaceFolder}",
+      "console": "integratedTerminal",
+      "presentation": {
+        "group": "3"
+      }
+    },
    {
      // script to generate the openapi schema
      "name": "Onyx OpenAPI Schema Generator",
@@ -429,10 +475,7 @@
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
-      "args": [
-        "--filename",
-        "generated/openapi.json"
-      ]
+      "args": ["--filename", "generated/openapi.json"]
    },
    {
      // script to debug multi tenant db issues
@@ -457,13 +500,12 @@
        "generated/tenants_by_num_docs.csv"
      ]
    },
-      {
-        "name": "Debug React Web App in Chrome",
-        "type": "chrome",
-        "request": "launch",
-        "url": "http://localhost:3000",
-        "webRoot": "${workspaceFolder}/web"
-      }
-    ]
-  }
-  
+    {
+      "name": "Debug React Web App in Chrome",
+      "type": "chrome",
+      "request": "launch",
+      "url": "http://localhost:3000",
+      "webRoot": "${workspaceFolder}/web"
+    }
+  ]
+}
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -0,0 +1,295 @@
+# AGENTS.md
+
+This file provides guidance to Codex when working with code in this repository.
+
+## KEY NOTES
+
+- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
+to assume the python venv.
+- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
+- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
+`a`. The app can be accessed at `http://localhost:3000`.
+- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
+make sure we see logs coming out from the relevant service.
+- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
+- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
+- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
+outside of those directories.
+
+## Project Overview
+
+**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
+
+
+### Background Workers (Celery)
+
+Onyx uses Celery for asynchronous task processing with multiple specialized workers:
+
+#### Worker Types
+
+1. **Primary Worker** (`celery_app.py`)
+   - Coordinates core background tasks and system-wide operations
+   - Handles connector management, document sync, pruning, and periodic checks
+   - Runs with 4 threads concurrency
+   - Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
+
+2. **Docfetching Worker** (`docfetching`)
+   - Fetches documents from external data sources (connectors)
+   - Spawns docprocessing tasks for each document batch
+   - Implements watchdog monitoring for stuck connectors
+   - Configurable concurrency (default from env)
+
+3. **Docprocessing Worker** (`docprocessing`)
+   - Processes fetched documents through the indexing pipeline:
+     - Upserts documents to PostgreSQL
+     - Chunks documents and adds contextual information
+     - Embeds chunks via model server
+     - Writes chunks to Vespa vector database
+     - Updates document metadata
+   - Configurable concurrency (default from env)
+
+4. **Light Worker** (`light`)
+   - Handles lightweight, fast operations
+   - Tasks: vespa operations, document permissions sync, external group sync
+   - Higher concurrency for quick tasks
+
+5. **Heavy Worker** (`heavy`)
+   - Handles resource-intensive operations
+   - Primary task: document pruning operations
+   - Runs with 4 threads concurrency
+
+6. **KG Processing Worker** (`kg_processing`)
+   - Handles Knowledge Graph processing and clustering
+   - Builds relationships between documents
+   - Runs clustering algorithms
+   - Configurable concurrency
+
+7. **Monitoring Worker** (`monitoring`)
+   - System health monitoring and metrics collection
+   - Monitors Celery queues, process memory, and system status
+   - Single thread (monitoring doesn't need parallelism)
+   - Cloud-specific monitoring tasks
+
+8. **Beat Worker** (`beat`)
+   - Celery's scheduler for periodic tasks
+   - Uses DynamicTenantScheduler for multi-tenant support
+   - Schedules tasks like:
+     - Indexing checks (every 15 seconds)
+     - Connector deletion checks (every 20 seconds)
+     - Vespa sync checks (every 20 seconds)
+     - Pruning checks (every 20 seconds)
+     - KG processing (every 60 seconds)
+     - Monitoring tasks (every 5 minutes)
+     - Cleanup tasks (hourly)
+
+#### Key Features
+
+- **Thread-based Workers**: All workers use thread pools (not processes) for stability
+- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a 
+middleware layer that automatically finds the appropriate tenant ID when sending tasks 
+via Celery Beat.
+- **Task Prioritization**: High, Medium, Low priority queues
+- **Monitoring**: Built-in heartbeat and liveness checking
+- **Failure Handling**: Automatic retry and failure recovery mechanisms
+- **Redis Coordination**: Inter-process communication via Redis
+- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
+
+
+#### Important Notes
+
+**Defining Tasks**: 
+- Always use `@shared_task` rather than `@celery_app`
+- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
+
+**Defining APIs**:
+When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
+function.
+
+**Testing Updates**:
+If you make any updates to a celery worker and you want to test these changes, you will need
+to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
+
+### Code Quality
+```bash
+# Install and run pre-commit hooks
+pre-commit install
+pre-commit run --all-files
+```
+
+NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
+
+## Architecture Overview
+
+### Technology Stack
+- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
+- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
+- **Database**: PostgreSQL with Redis caching
+- **Search**: Vespa vector database
+- **Auth**: OAuth2, SAML, multi-provider support
+- **AI/ML**: LangChain, LiteLLM, multiple embedding models
+
+### Directory Structure
+
+```
+backend/
+├── onyx/
+│   ├── auth/                    # Authentication & authorization
+│   ├── chat/                    # Chat functionality & LLM interactions
+│   ├── connectors/              # Data source connectors
+│   ├── db/                      # Database models & operations
+│   ├── document_index/          # Vespa integration
+│   ├── federated_connectors/    # External search connectors
+│   ├── llm/                     # LLM provider integrations
+│   └── server/                  # API endpoints & routers
+├── ee/                          # Enterprise Edition features
+├── alembic/                     # Database migrations
+└── tests/                       # Test suites
+
+web/
+├── src/app/                     # Next.js app router pages
+├── src/components/              # Reusable React components
+└── src/lib/                     # Utilities & business logic
+```
+
+## Database & Migrations
+
+### Running Migrations
+```bash
+# Standard migrations
+alembic upgrade head
+
+# Multi-tenant (Enterprise)
+alembic -n schema_private upgrade head
+```
+
+### Creating Migrations
+```bash
+# Auto-generate migration
+alembic revision --autogenerate -m "description"
+
+# Multi-tenant migration
+alembic -n schema_private revision --autogenerate -m "description"
+```
+
+## Testing Strategy
+
+There are 4 main types of tests within Onyx:
+
+### Unit Tests
+These should not assume any Onyx/external services are available to be called.
+Interactions with the outside world should be mocked using `unittest.mock`. Generally, only 
+write these for complex, isolated modules e.g. `citation_processing.py`.
+
+To run them:
+
+```bash
+python -m dotenv -f .vscode/.env run -- pytest -xv backend/tests/unit
+```
+
+### External Dependency Unit Tests
+These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis, 
+MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
+
+However, the actual Onyx containers are not running and with these tests we call the function to test directly.
+We can also mock components/calls at will. 
+
+The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey, 
+need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
+with certain args, something that would be impossible with proper integration tests).
+
+A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
+
+To run them:
+
+```bash
+python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
+```
+
+### Integration Tests
+Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot 
+mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal 
+verification is necessary) over any other type of test.
+
+Tests are parallelized at a directory level.
+
+When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager 
+class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
+calling the utilities directly (e.g. do NOT create admin users with 
+`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
+
+A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
+
+To run them:
+
+```bash
+python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
+```
+
+### Playwright (E2E) Tests
+These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx 
+running, *including* the Web Server.
+
+Use these tests for anything that requires significant frontend <-> backend coordination.
+
+Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
+
+To run them:
+
+```bash
+npx playwright test <TEST_NAME>
+```
+
+
+## Logs
+
+When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
+to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
+will be tailing their logs to this file. 
+
+
+## Security Considerations
+
+- Never commit API keys or secrets to repository
+- Use encrypted credential storage for connector credentials
+- Follow RBAC patterns for new features
+- Implement proper input validation with Pydantic models
+- Use parameterized queries to prevent SQL injection
+
+## AI/LLM Integration
+
+- Multiple LLM providers supported via LiteLLM
+- Configurable models per feature (chat, search, embeddings)
+- Streaming support for real-time responses
+- Token management and rate limiting
+- Custom prompts and agent actions
+
+## UI/UX Patterns
+
+- Tailwind CSS with design system in `web/src/components/ui/`
+- Radix UI and Headless UI for accessible components
+- SWR for data fetching and caching
+- Form validation with react-hook-form
+- Error handling with popup notifications
+
+## Creating a Plan
+When creating a plan in the `plans` directory, make sure to include at least these elements:
+
+**Issues to Address**
+What the change is meant to do.
+
+**Important Notes**
+Things you come across in your research that are important to the implementation.
+
+**Implementation strategy**
+How you are going to make the changes happen. High level approach.
+
+**Tests**
+What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to 
+verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
+
+Do NOT include these: *Timeline*, *Rollback plan*
+
+This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
+Keep it high level. You can reference certain files or functions though.
+
+Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -0,0 +1,295 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## KEY NOTES
+
+- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
+to assume the python venv.
+- To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
+- If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
+`a`. The app can be accessed at `http://localhost:3000`.
+- You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
+make sure we see logs coming out from the relevant service.
+- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
+- When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
+- Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
+outside of those directories.
+
+## Project Overview
+
+**Onyx** (formerly Danswer) is an open-source Gen-AI and Enterprise Search platform that connects to company documents, apps, and people. It features a modular architecture with both Community Edition (MIT licensed) and Enterprise Edition offerings.
+
+
+### Background Workers (Celery)
+
+Onyx uses Celery for asynchronous task processing with multiple specialized workers:
+
+#### Worker Types
+
+1. **Primary Worker** (`celery_app.py`)
+   - Coordinates core background tasks and system-wide operations
+   - Handles connector management, document sync, pruning, and periodic checks
+   - Runs with 4 threads concurrency
+   - Tasks: connector deletion, vespa sync, pruning, LLM model updates, user file sync
+
+2. **Docfetching Worker** (`docfetching`)
+   - Fetches documents from external data sources (connectors)
+   - Spawns docprocessing tasks for each document batch
+   - Implements watchdog monitoring for stuck connectors
+   - Configurable concurrency (default from env)
+
+3. **Docprocessing Worker** (`docprocessing`)
+   - Processes fetched documents through the indexing pipeline:
+     - Upserts documents to PostgreSQL
+     - Chunks documents and adds contextual information
+     - Embeds chunks via model server
+     - Writes chunks to Vespa vector database
+     - Updates document metadata
+   - Configurable concurrency (default from env)
+
+4. **Light Worker** (`light`)
+   - Handles lightweight, fast operations
+   - Tasks: vespa operations, document permissions sync, external group sync
+   - Higher concurrency for quick tasks
+
+5. **Heavy Worker** (`heavy`)
+   - Handles resource-intensive operations
+   - Primary task: document pruning operations
+   - Runs with 4 threads concurrency
+
+6. **KG Processing Worker** (`kg_processing`)
+   - Handles Knowledge Graph processing and clustering
+   - Builds relationships between documents
+   - Runs clustering algorithms
+   - Configurable concurrency
+
+7. **Monitoring Worker** (`monitoring`)
+   - System health monitoring and metrics collection
+   - Monitors Celery queues, process memory, and system status
+   - Single thread (monitoring doesn't need parallelism)
+   - Cloud-specific monitoring tasks
+
+8. **Beat Worker** (`beat`)
+   - Celery's scheduler for periodic tasks
+   - Uses DynamicTenantScheduler for multi-tenant support
+   - Schedules tasks like:
+     - Indexing checks (every 15 seconds)
+     - Connector deletion checks (every 20 seconds)
+     - Vespa sync checks (every 20 seconds)
+     - Pruning checks (every 20 seconds)
+     - KG processing (every 60 seconds)
+     - Monitoring tasks (every 5 minutes)
+     - Cleanup tasks (hourly)
+
+#### Key Features
+
+- **Thread-based Workers**: All workers use thread pools (not processes) for stability
+- **Tenant Awareness**: Multi-tenant support with per-tenant task isolation. There is a 
+middleware layer that automatically finds the appropriate tenant ID when sending tasks 
+via Celery Beat.
+- **Task Prioritization**: High, Medium, Low priority queues
+- **Monitoring**: Built-in heartbeat and liveness checking
+- **Failure Handling**: Automatic retry and failure recovery mechanisms
+- **Redis Coordination**: Inter-process communication via Redis
+- **PostgreSQL State**: Task state and metadata stored in PostgreSQL
+
+
+#### Important Notes
+
+**Defining Tasks**: 
+- Always use `@shared_task` rather than `@celery_app`
+- Put tasks under `background/celery/tasks/` or `ee/background/celery/tasks`
+
+**Defining APIs**:
+When creating new FastAPI APIs, do NOT use the `response_model` field. Instead, just type the
+function.
+
+**Testing Updates**:
+If you make any updates to a celery worker and you want to test these changes, you will need
+to ask me to restart the celery worker. There is no auto-restart on code-change mechanism.
+
+### Code Quality
+```bash
+# Install and run pre-commit hooks
+pre-commit install
+pre-commit run --all-files
+```
+
+NOTE: Always make sure everything is strictly typed (both in Python and Typescript).
+
+## Architecture Overview
+
+### Technology Stack
+- **Backend**: Python 3.11, FastAPI, SQLAlchemy, Alembic, Celery
+- **Frontend**: Next.js 15+, React 18, TypeScript, Tailwind CSS
+- **Database**: PostgreSQL with Redis caching
+- **Search**: Vespa vector database
+- **Auth**: OAuth2, SAML, multi-provider support
+- **AI/ML**: LangChain, LiteLLM, multiple embedding models
+
+### Directory Structure
+
+```
+backend/
+├── onyx/
+│   ├── auth/                    # Authentication & authorization
+│   ├── chat/                    # Chat functionality & LLM interactions
+│   ├── connectors/              # Data source connectors
+│   ├── db/                      # Database models & operations
+│   ├── document_index/          # Vespa integration
+│   ├── federated_connectors/    # External search connectors
+│   ├── llm/                     # LLM provider integrations
+│   └── server/                  # API endpoints & routers
+├── ee/                          # Enterprise Edition features
+├── alembic/                     # Database migrations
+└── tests/                       # Test suites
+
+web/
+├── src/app/                     # Next.js app router pages
+├── src/components/              # Reusable React components
+└── src/lib/                     # Utilities & business logic
+```
+
+## Database & Migrations
+
+### Running Migrations
+```bash
+# Standard migrations
+alembic upgrade head
+
+# Multi-tenant (Enterprise)
+alembic -n schema_private upgrade head
+```
+
+### Creating Migrations
+```bash
+# Auto-generate migration
+alembic revision --autogenerate -m "description"
+
+# Multi-tenant migration
+alembic -n schema_private revision --autogenerate -m "description"
+```
+
+## Testing Strategy
+
+There are 4 main types of tests within Onyx:
+
+### Unit Tests
+These should not assume any Onyx/external services are available to be called.
+Interactions with the outside world should be mocked using `unittest.mock`. Generally, only 
+write these for complex, isolated modules e.g. `citation_processing.py`.
+
+To run them:
+
+```bash
+python -m dotenv -f .vscode/.env run -- pytest -xv backend/tests/unit
+```
+
+### External Dependency Unit Tests
+These tests assume that all external dependencies of Onyx are available and callable (e.g. Postgres, Redis, 
+MinIO/S3, Vespa are running + OpenAI can be called + any request to the internet is fine + etc.).
+
+However, the actual Onyx containers are not running and with these tests we call the function to test directly.
+We can also mock components/calls at will. 
+
+The goal with these tests are to minimize mocking while giving some flexibility to mock things that are flakey, 
+need strictly controlled behavior, or need to have their internal behavior validated (e.g. verify a function is called
+with certain args, something that would be impossible with proper integration tests).
+
+A great example of this type of test is `backend/tests/external_dependency_unit/connectors/confluence/test_confluence_group_sync.py`.
+
+To run them:
+
+```bash
+python -m dotenv -f .vscode/.env run -- pytest backend/tests/external_dependency_unit
+```
+
+### Integration Tests
+Standard integration tests. Every test in `backend/tests/integration` runs against a real Onyx deployment. We cannot 
+mock anything in these tests. Prefer writing integration tests (or External Dependency Unit Tests if mocking/internal 
+verification is necessary) over any other type of test.
+
+Tests are parallelized at a directory level.
+
+When writing integration tests, make sure to check the root `conftest.py` for useful fixtures + the `backend/tests/integration/common_utils` directory for utilities. Prefer (if one exists), calling the appropriate Manager 
+class in the utils over directly calling the APIs with a library like `requests`. Prefer using fixtures rather than
+calling the utilities directly (e.g. do NOT create admin users with 
+`admin_user = UserManager.create(name="admin_user")`, instead use the `admin_user` fixture).
+
+A great example of this type of test is `backend/tests/integration/dev_apis/test_simple_chat_api.py`.
+
+To run them:
+
+```bash
+python -m dotenv -f .vscode/.env run -- pytest backend/tests/integration
+```
+
+### Playwright (E2E) Tests
+These tests are an even more complete version of the Integration Tests mentioned above. Has all services of Onyx 
+running, *including* the Web Server.
+
+Use these tests for anything that requires significant frontend <-> backend coordination.
+
+Tests are located at `web/tests/e2e`. Tests are written in TypeScript.
+
+To run them:
+
+```bash
+npx playwright test <TEST_NAME>
+```
+
+
+## Logs
+
+When (1) writing integration tests or (2) doing live tests (e.g. curl / playwright) you can get access
+to logs via the `backend/log/<service_name>_debug.log` file. All Onyx services (api_server, web_server, celery_X)
+will be tailing their logs to this file. 
+
+
+## Security Considerations
+
+- Never commit API keys or secrets to repository
+- Use encrypted credential storage for connector credentials
+- Follow RBAC patterns for new features
+- Implement proper input validation with Pydantic models
+- Use parameterized queries to prevent SQL injection
+
+## AI/LLM Integration
+
+- Multiple LLM providers supported via LiteLLM
+- Configurable models per feature (chat, search, embeddings)
+- Streaming support for real-time responses
+- Token management and rate limiting
+- Custom prompts and agent actions
+
+## UI/UX Patterns
+
+- Tailwind CSS with design system in `web/src/components/ui/`
+- Radix UI and Headless UI for accessible components
+- SWR for data fetching and caching
+- Form validation with react-hook-form
+- Error handling with popup notifications
+
+## Creating a Plan
+When creating a plan in the `plans` directory, make sure to include at least these elements:
+
+**Issues to Address**
+What the change is meant to do.
+
+**Important Notes**
+Things you come across in your research that are important to the implementation.
+
+**Implementation strategy**
+How you are going to make the changes happen. High level approach.
+
+**Tests**
+What unit (use rarely), external dependency unit, integration, and playwright tests you plan to write to 
+verify the correct behavior. Don't overtest. Usually, a given change only needs one type of test.
+
+Do NOT include these: *Timeline*, *Rollback plan*
+
+This is a minimal list - feel free to include more. Do NOT write code as part of your plan.
+Keep it high level. You can reference certain files or functions though.
+
+Before writing your plan, make sure to do research. Explore the relevant sections in the codebase.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -84,10 +84,6 @@ python -m venv .venv
 source .venv/bin/activate
 ```

-> **Note:**
-> This virtual environment MUST NOT be set up WITHIN the onyx directory if you plan on using mypy within certain IDEs.
-> For simplicity, we recommend setting up the virtual environment outside of the onyx directory.
-
 _For Windows, activate the virtual environment using Command Prompt:_

 ```bash
@@ -175,7 +171,7 @@ You will need Docker installed to run these containers.
 First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:

 ```bash
-docker compose -f docker-compose.dev.yml -p onyx-stack up -d index relational_db cache minio
+docker compose up -d index relational_db cache minio
 ```

 (index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
@@ -257,7 +253,7 @@ You can run the full Onyx application stack from pre-built images including all
 Navigate to `onyx/deployment/docker_compose` and run:

 ```bash
-docker compose -f docker-compose.dev.yml -p onyx-stack up -d
+docker compose up -d
 ```

 After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Onyx.
@@ -265,7 +261,7 @@ After Docker pulls and starts these containers, navigate to `http://localhost:30
 If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes like so:

 ```bash
-docker compose -f docker-compose.dev.yml -p onyx-stack up -d --build
+docker compose up -d --build
 ```


--- a/README.md
+++ b/README.md
@@ -1,117 +1,103 @@
-<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/README.md"} -->
-
 <a name="readme-top"></a>

 <h2 align="center">
-<a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
+    <a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
 </h2>

-<p align="center">
-<p align="center">Open Source Gen-AI + Enterprise Search.</p>
+<p align="center">Open Source AI Platform</p>

 <p align="center">
-<a href="https://docs.onyx.app/" target="_blank">
-    <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
-</a>
-<a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-34lu4m7xg-TsKGO6h8PDvR5W27zTdyhA" target="_blank">
-    <img src="https://img.shields.io/badge/slack-join-blue.svg?logo=slack" alt="Slack">
-</a>
-<a href="https://discord.gg/TDJ59cGV2X" target="_blank">
-    <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
-</a>
-<a href="https://github.com/onyx-dot-app/onyx/blob/main/README.md" target="_blank">
-    <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
-</a>
+    <a href="https://discord.gg/TDJ59cGV2X" target="_blank">
+        <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
+    </a>
+    <a href="https://docs.onyx.app/" target="_blank">
+        <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
+    </a>
+    <a href="https://docs.onyx.app/" target="_blank">
+        <img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation">
+    </a>
+    <a href="https://github.com/onyx-dot-app/onyx/blob/main/LICENSE" target="_blank">
+        <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
+    </a>
 </p>

-<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI platform connected to your company's docs, apps, and people.
-Onyx provides a feature rich Chat interface and plugs into any LLM of your choice.
-Keep knowledge and access controls sync-ed across over 40 connectors like Google Drive, Slack, Confluence, Salesforce, etc.
-Create custom AI agents with unique prompts, knowledge, and actions that the agents can take.
-Onyx can be deployed securely anywhere and for any scale - on a laptop, on-premise, or to cloud.


-<h3>Feature Highlights</h3>
+**[Onyx](https://www.onyx.app/)** is a feature-rich, self-hostable Chat UI that works with any LLM. It is easy to deploy and can run in a completely airgapped environment.

-**Deep research over your team's knowledge:**
+Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep Research, Connectors to 40+ knowledge sources, and more.

-https://private-user-images.githubusercontent.com/32520769/414509312-48392e83-95d0-4fb5-8650-a396e05e0a32.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3Mzk5Mjg2MzYsIm5iZiI6MTczOTkyODMzNiwicGF0aCI6Ii8zMjUyMDc2OS80MTQ1MDkzMTItNDgzOTJlODMtOTVkMC00ZmI1LTg2NTAtYTM5NmUwNWUwYTMyLm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAyMTklMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMjE5VDAxMjUzNlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPWFhMzk5Njg2Y2Y5YjFmNDNiYTQ2YzM5ZTg5YWJiYTU2NWMyY2YwNmUyODE2NWUxMDRiMWQxZWJmODI4YTA0MTUmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.a9D8A0sgKE9AoaoE-mfFbJ6_OKYeqaf7TZ4Han2JfW8
+> [!TIP]
+> Run Onyx with one command (or see deployment section below):
+> ```
+> curl -fsSL https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.sh > install.sh && chmod +x install.sh && ./install.sh
+> ```

-
-**Use Onyx as a secure AI Chat with any LLM:**
+****

 ![Onyx Chat Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxChatSilentDemo.gif)


-**Easily set up connectors to your apps:**

-![Onyx Connector Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxConnectorSilentDemo.gif)
+## ⭐ Features
+- **🤖 Custom Agents:** Build AI Agents with unique instructions, knowledge and actions.
+- **🌍 Web Search:** Browse the web with Google PSE, Exa, and Serper as well as an in-house scraper or Firecrawl.
+- **🔍 RAG:** Best in class hybrid-search + knowledge graph for uploaded files and ingested documents from connectors. 
+- **🔄 Connectors:** Pull knowledge, metadata, and access information from over 40 applications.
+- **🔬 Deep Research:** Get in depth answers with an agentic multi-step search.
+- **▶️ Actions & MCP:** Give AI Agents the ability to interact with external systems.
+- **💻 Code Interpreter:** Execute code to analyze data, render graphs and create files.
+- **🎨 Image Generation:** Generate images based on user prompts.
+- **👥 Collaboration:** Chat sharing, feedback gathering, user management, usage analytics, and more.
+
+Onyx works with all LLMs (like OpenAI, Anthropic, Gemini, etc.) and self-hosted LLMs (like Ollama, vLLM, etc.)
+
+To learn more about the features, check out our [documentation](https://docs.onyx.app/welcome)!


-**Access Onyx where your team already works:**

-![Onyx Bot Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxBot.png)
+## 🚀 Deployment
+Onyx supports deployments in Docker, Kubernetes, Terraform, along with guides for major cloud providers.
+
+See guides below:
+- [Docker](https://docs.onyx.app/deployment/local/docker) or [Quickstart](https://docs.onyx.app/deployment/getting_started/quickstart) (best for most users)
+- [Kubernetes](https://docs.onyx.app/deployment/local/kubernetes) (best for large teams)
+- [Terraform](https://docs.onyx.app/deployment/local/terraform) (best for teams already using Terraform)
+- Cloud specific guides (best if specifically using [AWS EKS](https://docs.onyx.app/deployment/cloud/aws/eks), [Azure VMs](https://docs.onyx.app/deployment/cloud/azure), etc.)
+
+> [!TIP]  
+> **To try Onyx for free without deploying, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.


-## Deployment
-**To try it out for free and get started in seconds, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.

-Onyx can also be run locally (even on a laptop) or deployed on a virtual machine with a single
-`docker compose` command. Checkout our [docs](https://docs.onyx.app/quickstart) to learn more.
+## 🔍 Other Notable Benefits
+Onyx is built for teams of all sizes, from individual users to the largest global enterprises.

-We also have built-in support for high-availability/scalable deployment on Kubernetes.
-References [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment).
+- **Enterprise Search**: far more than simple RAG, Onyx has custom indexing and retrieval that remains performant and accurate for scales of up to tens of millions of documents.
+- **Security**: SSO (OIDC/SAML/OAuth2), RBAC, encryption of credentials, etc.
+- **Management UI**: different user roles such as basic, curator, and admin.
+- **Document Permissioning**: mirrors user access from external apps for RAG use cases.


-## 🔍 Other Notable Benefits of Onyx
- Custom deep learning models for indexing and inference time, only through Onyx + learning from user feedback.
- Flexible security features like SSO (OIDC/SAML/OAuth2), RBAC, encryption of credentials, etc.
- Knowledge curation features like document-sets, query history, usage analytics, etc.
- Scalable deployment options tested up to many tens of thousands users and hundreds of millions of documents.
-

 ## 🚧 Roadmap
- New methods in information retrieval (StructRAG, LightGraphRAG, etc.)
- Personalized Search
- Organizational understanding and ability to locate and suggest experts from your team.
- Code Search
- SQL and Structured Query Language
+To see ongoing and upcoming projects, check out our [roadmap](https://github.com/orgs/onyx-dot-app/projects/2)!


-## 🔌 Connectors
-Keep knowledge and access up to sync across 40+ connectors:
-
- Google Drive
- Confluence
- Slack
- Gmail
- Salesforce
- Microsoft Sharepoint
- Github
- Jira
- Zendesk
- Gong
- Microsoft Teams
- Dropbox
- Local Files
- Websites
- And more ...
-
-See the full list [here](https://docs.onyx.app/connectors).
-

 ## 📚 Licensing
 There are two editions of Onyx:

- Onyx Community Edition (CE) is available freely under the MIT Expat license. Simply follow the Deployment guide above.
+- Onyx Community Edition (CE) is available freely under the MIT license.
 - Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
 For feature details, check out [our website](https://www.onyx.app/pricing).

-To try the Onyx Enterprise Edition:
-1. Checkout [Onyx Cloud](https://cloud.onyx.app/signup).
-2. For self-hosting the Enterprise Edition, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).
+
+
+## 👪 Community
+Join our open source community on **[Discord](https://discord.gg/TDJ59cGV2X)**!
+


 ## 💡 Contributing
 Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
-
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -23,6 +23,22 @@ RUN mkdir -p /app && \
    chmod 755 /var/log/onyx && \
    chown onyx:onyx /var/log/onyx

+# --- add toolchain needed for Rust/Python builds (fastuuid) ---
+ENV RUSTUP_HOME=/usr/local/rustup \
+    CARGO_HOME=/usr/local/cargo \
+    PATH=/usr/local/cargo/bin:$PATH
+
+RUN set -eux; \
+    apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        pkg-config \
+        curl \
+        ca-certificates \
+    && rm -rf /var/lib/apt/lists/* \
+    # Install latest stable Rust (supports Cargo.lock v4)
+    && curl -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain stable \
+    && rustc --version && cargo --version
+
 COPY ./requirements/model_server.txt /tmp/requirements.txt
 RUN pip install --no-cache-dir --upgrade \
        --retries 5 \
--- a/backend/alembic/versions/0cd424f32b1d_user_file_data_preparation_and_backfill.py
+++ b/backend/alembic/versions/0cd424f32b1d_user_file_data_preparation_and_backfill.py
@@ -0,0 +1,389 @@
+"""Migration 2: User file data preparation and backfill
+
+Revision ID: 0cd424f32b1d
+Revises: 9b66d3156fc6
+Create Date: 2025-09-22 09:44:42.727034
+
+This migration populates the new columns added in migration 1.
+It prepares data for the UUID transition and relationship migration.
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy import text
+import logging
+
+logger = logging.getLogger("alembic.runtime.migration")
+
+# revision identifiers, used by Alembic.
+revision = "0cd424f32b1d"
+down_revision = "9b66d3156fc6"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """Populate new columns with data."""
+
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    # === Step 1: Populate user_file.new_id ===
+    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
+    has_new_id = "new_id" in user_file_columns
+
+    if has_new_id:
+        logger.info("Populating user_file.new_id with UUIDs...")
+
+        # Count rows needing UUIDs
+        null_count = bind.execute(
+            text("SELECT COUNT(*) FROM user_file WHERE new_id IS NULL")
+        ).scalar_one()
+
+        if null_count > 0:
+            logger.info(f"Generating UUIDs for {null_count} user_file records...")
+
+            # Populate in batches to avoid long locks
+            batch_size = 10000
+            total_updated = 0
+
+            while True:
+                result = bind.execute(
+                    text(
+                        """
+                    UPDATE user_file
+                    SET new_id = gen_random_uuid()
+                    WHERE new_id IS NULL
+                    AND id IN (
+                        SELECT id FROM user_file
+                        WHERE new_id IS NULL
+                        LIMIT :batch_size
+                    )
+                """
+                    ),
+                    {"batch_size": batch_size},
+                )
+
+                updated = result.rowcount
+                total_updated += updated
+
+                if updated < batch_size:
+                    break
+
+                logger.info(f"  Updated {total_updated}/{null_count} records...")
+
+            logger.info(f"Generated UUIDs for {total_updated} user_file records")
+
+        # Verify all records have UUIDs
+        remaining_null = bind.execute(
+            text("SELECT COUNT(*) FROM user_file WHERE new_id IS NULL")
+        ).scalar_one()
+
+        if remaining_null > 0:
+            raise Exception(
+                f"Failed to populate all user_file.new_id values ({remaining_null} NULL)"
+            )
+
+        # Lock down the column
+        op.alter_column("user_file", "new_id", nullable=False)
+        op.alter_column("user_file", "new_id", server_default=None)
+        logger.info("Locked down user_file.new_id column")
+
+    # === Step 2: Populate persona__user_file.user_file_id_uuid ===
+    persona_user_file_columns = [
+        col["name"] for col in inspector.get_columns("persona__user_file")
+    ]
+
+    if has_new_id and "user_file_id_uuid" in persona_user_file_columns:
+        logger.info("Populating persona__user_file.user_file_id_uuid...")
+
+        # Count rows needing update
+        null_count = bind.execute(
+            text(
+                """
+            SELECT COUNT(*) FROM persona__user_file
+            WHERE user_file_id IS NOT NULL AND user_file_id_uuid IS NULL
+        """
+            )
+        ).scalar_one()
+
+        if null_count > 0:
+            logger.info(f"Updating {null_count} persona__user_file records...")
+
+            # Update in batches
+            batch_size = 10000
+            total_updated = 0
+
+            while True:
+                result = bind.execute(
+                    text(
+                        """
+                    UPDATE persona__user_file p
+                    SET user_file_id_uuid = uf.new_id
+                    FROM user_file uf
+                    WHERE p.user_file_id = uf.id
+                    AND p.user_file_id_uuid IS NULL
+                    AND p.persona_id IN (
+                        SELECT persona_id
+                        FROM persona__user_file
+                        WHERE user_file_id_uuid IS NULL
+                        LIMIT :batch_size
+                    )
+                """
+                    ),
+                    {"batch_size": batch_size},
+                )
+
+                updated = result.rowcount
+                total_updated += updated
+
+                if updated < batch_size:
+                    break
+
+                logger.info(f"  Updated {total_updated}/{null_count} records...")
+
+            logger.info(f"Updated {total_updated} persona__user_file records")
+
+        # Verify all records are populated
+        remaining_null = bind.execute(
+            text(
+                """
+            SELECT COUNT(*) FROM persona__user_file
+            WHERE user_file_id IS NOT NULL AND user_file_id_uuid IS NULL
+        """
+            )
+        ).scalar_one()
+
+        if remaining_null > 0:
+            raise Exception(
+                f"Failed to populate all persona__user_file.user_file_id_uuid values ({remaining_null} NULL)"
+            )
+
+        op.alter_column("persona__user_file", "user_file_id_uuid", nullable=False)
+        logger.info("Locked down persona__user_file.user_file_id_uuid column")
+
+    # === Step 3: Create user_project records from chat_folder ===
+    if "chat_folder" in inspector.get_table_names():
+        logger.info("Creating user_project records from chat_folder...")
+
+        result = bind.execute(
+            text(
+                """
+            INSERT INTO user_project (user_id, name)
+            SELECT cf.user_id, cf.name
+            FROM chat_folder cf
+            WHERE NOT EXISTS (
+                SELECT 1
+                FROM user_project up
+                WHERE up.user_id = cf.user_id AND up.name = cf.name
+            )
+        """
+            )
+        )
+
+        logger.info(f"Created {result.rowcount} user_project records from chat_folder")
+
+    # === Step 4: Populate chat_session.project_id ===
+    chat_session_columns = [
+        col["name"] for col in inspector.get_columns("chat_session")
+    ]
+
+    if "folder_id" in chat_session_columns and "project_id" in chat_session_columns:
+        logger.info("Populating chat_session.project_id...")
+
+        # Count sessions needing update
+        null_count = bind.execute(
+            text(
+                """
+            SELECT COUNT(*) FROM chat_session
+            WHERE project_id IS NULL AND folder_id IS NOT NULL
+        """
+            )
+        ).scalar_one()
+
+        if null_count > 0:
+            logger.info(f"Updating {null_count} chat_session records...")
+
+            result = bind.execute(
+                text(
+                    """
+                UPDATE chat_session cs
+                SET project_id = up.id
+                FROM chat_folder cf
+                JOIN user_project up ON up.user_id = cf.user_id AND up.name = cf.name
+                WHERE cs.folder_id = cf.id AND cs.project_id IS NULL
+            """
+                )
+            )
+
+            logger.info(f"Updated {result.rowcount} chat_session records")
+
+        # Verify all records are populated
+        remaining_null = bind.execute(
+            text(
+                """
+            SELECT COUNT(*) FROM chat_session
+            WHERE project_id IS NULL AND folder_id IS NOT NULL
+        """
+            )
+        ).scalar_one()
+
+        if remaining_null > 0:
+            logger.warning(
+                f"Warning: {remaining_null} chat_session records could not be mapped to projects"
+            )
+
+    # === Step 5: Update plaintext FileRecord IDs/display names to UUID scheme ===
+    # Prior to UUID migration, plaintext cache files were stored with file_id like 'plain_text_<int_id>'.
+    # After migration, we use 'plaintext_<uuid>' (note the name change to 'plaintext_').
+    # This step remaps existing FileRecord rows to the new naming while preserving object_key/bucket.
+    logger.info("Updating plaintext FileRecord ids and display names to UUID scheme...")
+
+    # Count legacy plaintext records that can be mapped to UUID user_file ids
+    count_query = text(
+        """
+        SELECT COUNT(*)
+        FROM file_record fr
+        JOIN user_file uf ON fr.file_id = CONCAT('plaintext_', uf.id::text)
+        WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
+        """
+    )
+    legacy_count = bind.execute(count_query).scalar_one()
+
+    if legacy_count and legacy_count > 0:
+        logger.info(f"Found {legacy_count} legacy plaintext file records to update")
+
+        # Update display_name first for readability (safe regardless of rename)
+        bind.execute(
+            text(
+                """
+                UPDATE file_record fr
+                SET display_name = CONCAT('Plaintext for user file ', uf.new_id::text)
+                FROM user_file uf
+                WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
+                    AND fr.file_id = CONCAT('plaintext_', uf.id::text)
+                """
+            )
+        )
+
+        # Remap file_id from 'plaintext_<int>' -> 'plaintext_<uuid>' using transitional new_id
+        # Use a single UPDATE ... WHERE file_id LIKE 'plain_text_%'
+        # and ensure it aligns to existing user_file ids to avoid renaming unrelated rows
+        result = bind.execute(
+            text(
+                """
+                UPDATE file_record fr
+                SET file_id = CONCAT('plaintext_', uf.new_id::text)
+                FROM user_file uf
+                WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
+                    AND fr.file_id = CONCAT('plaintext_', uf.id::text)
+                """
+            )
+        )
+        logger.info(
+            f"Updated {result.rowcount} plaintext file_record ids to UUID scheme"
+        )
+
+    # === Step 6: Ensure document_id_migrated default TRUE and backfill existing FALSE ===
+    # New records should default to migrated=True so the migration task won't run for them.
+    # Existing rows that had a legacy document_id should be marked as not migrated to be processed.
+
+    # Backfill existing records: if document_id is not null, set to FALSE
+    bind.execute(
+        text(
+            """
+            UPDATE user_file
+            SET document_id_migrated = FALSE
+            WHERE document_id IS NOT NULL
+            """
+        )
+    )
+
+    # === Step 7: Backfill user_file.status from index_attempt ===
+    logger.info("Backfilling user_file.status from index_attempt...")
+
+    # Update user_file status based on latest index attempt
+    # Using CTEs instead of temp tables for asyncpg compatibility
+    result = bind.execute(
+        text(
+            """
+        WITH latest_attempt AS (
+            SELECT DISTINCT ON (ia.connector_credential_pair_id)
+                ia.connector_credential_pair_id,
+                ia.status
+            FROM index_attempt ia
+            ORDER BY ia.connector_credential_pair_id, ia.time_updated DESC
+        ),
+        uf_to_ccp AS (
+            SELECT DISTINCT uf.id AS uf_id, ccp.id AS cc_pair_id
+            FROM user_file uf
+            JOIN document_by_connector_credential_pair dcc
+                ON dcc.id = REPLACE(uf.document_id, 'USER_FILE_CONNECTOR__', 'FILE_CONNECTOR__')
+            JOIN connector_credential_pair ccp
+                ON ccp.connector_id = dcc.connector_id
+                AND ccp.credential_id = dcc.credential_id
+        )
+        UPDATE user_file uf
+        SET status = CASE
+            WHEN la.status IN ('NOT_STARTED', 'IN_PROGRESS') THEN 'PROCESSING'
+            WHEN la.status = 'SUCCESS' THEN 'COMPLETED'
+            ELSE 'FAILED'
+        END
+        FROM uf_to_ccp ufc
+        LEFT JOIN latest_attempt la
+            ON la.connector_credential_pair_id = ufc.cc_pair_id
+        WHERE uf.id = ufc.uf_id
+        AND uf.status = 'PROCESSING'
+    """
+        )
+    )
+
+    logger.info(f"Updated status for {result.rowcount} user_file records")
+
+    logger.info("Migration 2 (data preparation) completed successfully")
+
+
+def downgrade() -> None:
+    """Reset populated data to allow clean downgrade of schema."""
+
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    logger.info("Starting downgrade of data preparation...")
+
+    # Reset user_file columns to allow nulls before data removal
+    if "user_file" in inspector.get_table_names():
+        columns = [col["name"] for col in inspector.get_columns("user_file")]
+
+        if "new_id" in columns:
+            op.alter_column(
+                "user_file",
+                "new_id",
+                nullable=True,
+                server_default=sa.text("gen_random_uuid()"),
+            )
+            # Optionally clear the data
+            # bind.execute(text("UPDATE user_file SET new_id = NULL"))
+            logger.info("Reset user_file.new_id to nullable")
+
+    # Reset persona__user_file.user_file_id_uuid
+    if "persona__user_file" in inspector.get_table_names():
+        columns = [col["name"] for col in inspector.get_columns("persona__user_file")]
+
+        if "user_file_id_uuid" in columns:
+            op.alter_column("persona__user_file", "user_file_id_uuid", nullable=True)
+            # Optionally clear the data
+            # bind.execute(text("UPDATE persona__user_file SET user_file_id_uuid = NULL"))
+            logger.info("Reset persona__user_file.user_file_id_uuid to nullable")
+
+    # Note: We don't delete user_project records or reset chat_session.project_id
+    # as these might be in use and can be handled by the schema downgrade
+
+    # Reset user_file.status to default
+    if "user_file" in inspector.get_table_names():
+        columns = [col["name"] for col in inspector.get_columns("user_file")]
+        if "status" in columns:
+            bind.execute(text("UPDATE user_file SET status = 'PROCESSING'"))
+            logger.info("Reset user_file.status to default")
+
+    logger.info("Downgrade completed successfully")
--- a/backend/alembic/versions/16c37a30adf2_user_file_relationship_migration.py
+++ b/backend/alembic/versions/16c37a30adf2_user_file_relationship_migration.py
@@ -0,0 +1,261 @@
+"""Migration 3: User file relationship migration
+
+Revision ID: 16c37a30adf2
+Revises: 0cd424f32b1d
+Create Date: 2025-09-22 09:47:34.175596
+
+This migration converts folder-based relationships to project-based relationships.
+It migrates persona__user_folder to persona__user_file and populates project__user_file.
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy import text
+import logging
+
+logger = logging.getLogger("alembic.runtime.migration")
+
+# revision identifiers, used by Alembic.
+revision = "16c37a30adf2"
+down_revision = "0cd424f32b1d"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """Migrate folder-based relationships to project-based relationships."""
+
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    # === Step 1: Migrate persona__user_folder to persona__user_file ===
+    table_names = inspector.get_table_names()
+
+    if "persona__user_folder" in table_names and "user_file" in table_names:
+        user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
+        has_new_id = "new_id" in user_file_columns
+
+        if has_new_id and "folder_id" in user_file_columns:
+            logger.info(
+                "Migrating persona__user_folder relationships to persona__user_file..."
+            )
+
+            # Count relationships to migrate (asyncpg-compatible)
+            count_query = text(
+                """
+                SELECT COUNT(*)
+                FROM (
+                    SELECT DISTINCT puf.persona_id, uf.id
+                    FROM persona__user_folder puf
+                    JOIN user_file uf ON uf.folder_id = puf.user_folder_id
+                    WHERE NOT EXISTS (
+                        SELECT 1
+                        FROM persona__user_file p2
+                        WHERE p2.persona_id = puf.persona_id
+                        AND p2.user_file_id = uf.id
+                    )
+                ) AS distinct_pairs
+            """
+            )
+            to_migrate = bind.execute(count_query).scalar_one()
+
+            if to_migrate > 0:
+                logger.info(f"Creating {to_migrate} persona-file relationships...")
+
+                # Migrate in batches to avoid memory issues
+                batch_size = 10000
+                total_inserted = 0
+
+                while True:
+                    # Insert batch directly using subquery (asyncpg compatible)
+                    result = bind.execute(
+                        text(
+                            """
+                        INSERT INTO persona__user_file (persona_id, user_file_id, user_file_id_uuid)
+                        SELECT DISTINCT puf.persona_id, uf.id as file_id, uf.new_id
+                        FROM persona__user_folder puf
+                        JOIN user_file uf ON uf.folder_id = puf.user_folder_id
+                        WHERE NOT EXISTS (
+                            SELECT 1
+                            FROM persona__user_file p2
+                            WHERE p2.persona_id = puf.persona_id
+                            AND p2.user_file_id = uf.id
+                        )
+                        LIMIT :batch_size
+                    """
+                        ),
+                        {"batch_size": batch_size},
+                    )
+
+                    inserted = result.rowcount
+                    total_inserted += inserted
+
+                    if inserted < batch_size:
+                        break
+
+                    logger.info(
+                        f"  Migrated {total_inserted}/{to_migrate} relationships..."
+                    )
+
+                logger.info(
+                    f"Created {total_inserted} persona__user_file relationships"
+                )
+
+    # === Step 2: Add foreign key for chat_session.project_id ===
+    chat_session_fks = inspector.get_foreign_keys("chat_session")
+    fk_exists = any(
+        fk["name"] == "fk_chat_session_project_id" for fk in chat_session_fks
+    )
+
+    if not fk_exists:
+        logger.info("Adding foreign key constraint for chat_session.project_id...")
+        op.create_foreign_key(
+            "fk_chat_session_project_id",
+            "chat_session",
+            "user_project",
+            ["project_id"],
+            ["id"],
+        )
+        logger.info("Added foreign key constraint")
+
+    # === Step 3: Populate project__user_file from user_file.folder_id ===
+    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
+    has_new_id = "new_id" in user_file_columns
+
+    if has_new_id and "folder_id" in user_file_columns:
+        logger.info("Populating project__user_file from folder relationships...")
+
+        # Count relationships to create
+        count_query = text(
+            """
+            SELECT COUNT(*)
+            FROM user_file uf
+            WHERE uf.folder_id IS NOT NULL
+            AND NOT EXISTS (
+                SELECT 1
+                FROM project__user_file puf
+                WHERE puf.project_id = uf.folder_id
+                AND puf.user_file_id = uf.new_id
+            )
+        """
+        )
+        to_create = bind.execute(count_query).scalar_one()
+
+        if to_create > 0:
+            logger.info(f"Creating {to_create} project-file relationships...")
+
+            # Insert in batches
+            batch_size = 10000
+            total_inserted = 0
+
+            while True:
+                result = bind.execute(
+                    text(
+                        """
+                    INSERT INTO project__user_file (project_id, user_file_id)
+                    SELECT uf.folder_id, uf.new_id
+                    FROM user_file uf
+                    WHERE uf.folder_id IS NOT NULL
+                    AND NOT EXISTS (
+                        SELECT 1
+                        FROM project__user_file puf
+                        WHERE puf.project_id = uf.folder_id
+                        AND puf.user_file_id = uf.new_id
+                    )
+                    LIMIT :batch_size
+                    ON CONFLICT (project_id, user_file_id) DO NOTHING
+                """
+                    ),
+                    {"batch_size": batch_size},
+                )
+
+                inserted = result.rowcount
+                total_inserted += inserted
+
+                if inserted < batch_size:
+                    break
+
+                logger.info(f"  Created {total_inserted}/{to_create} relationships...")
+
+            logger.info(f"Created {total_inserted} project__user_file relationships")
+
+    # === Step 4: Create index on chat_session.project_id ===
+    try:
+        indexes = [ix.get("name") for ix in inspector.get_indexes("chat_session")]
+    except Exception:
+        indexes = []
+
+    if "ix_chat_session_project_id" not in indexes:
+        logger.info("Creating index on chat_session.project_id...")
+        op.create_index(
+            "ix_chat_session_project_id", "chat_session", ["project_id"], unique=False
+        )
+        logger.info("Created index")
+
+    logger.info("Migration 3 (relationship migration) completed successfully")
+
+
+def downgrade() -> None:
+    """Remove migrated relationships and constraints."""
+
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    logger.info("Starting downgrade of relationship migration...")
+
+    # Drop index on chat_session.project_id
+    try:
+        indexes = [ix.get("name") for ix in inspector.get_indexes("chat_session")]
+        if "ix_chat_session_project_id" in indexes:
+            op.drop_index("ix_chat_session_project_id", "chat_session")
+            logger.info("Dropped index on chat_session.project_id")
+    except Exception:
+        pass
+
+    # Drop foreign key constraint
+    try:
+        chat_session_fks = inspector.get_foreign_keys("chat_session")
+        fk_exists = any(
+            fk["name"] == "fk_chat_session_project_id" for fk in chat_session_fks
+        )
+        if fk_exists:
+            op.drop_constraint(
+                "fk_chat_session_project_id", "chat_session", type_="foreignkey"
+            )
+            logger.info("Dropped foreign key constraint on chat_session.project_id")
+    except Exception:
+        pass
+
+    # Clear project__user_file relationships (but keep the table for migration 1 to handle)
+    if "project__user_file" in inspector.get_table_names():
+        result = bind.execute(text("DELETE FROM project__user_file"))
+        logger.info(f"Cleared {result.rowcount} records from project__user_file")
+
+    # Remove migrated persona__user_file relationships
+    # Only remove those that came from folder relationships
+    if all(
+        table in inspector.get_table_names()
+        for table in ["persona__user_file", "persona__user_folder", "user_file"]
+    ):
+        user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
+        if "folder_id" in user_file_columns:
+            result = bind.execute(
+                text(
+                    """
+                DELETE FROM persona__user_file puf
+                WHERE EXISTS (
+                    SELECT 1
+                    FROM user_file uf
+                    JOIN persona__user_folder puf2
+                        ON puf2.user_folder_id = uf.folder_id
+                    WHERE puf.persona_id = puf2.persona_id
+                    AND puf.user_file_id = uf.id
+                )
+            """
+                )
+            )
+            logger.info(
+                f"Removed {result.rowcount} migrated persona__user_file relationships"
+            )
+
+    logger.info("Downgrade completed successfully")
--- a/backend/alembic/versions/2b75d0a8ffcb_user_file_schema_cleanup.py
+++ b/backend/alembic/versions/2b75d0a8ffcb_user_file_schema_cleanup.py
@@ -0,0 +1,218 @@
+"""Migration 6: User file schema cleanup
+
+Revision ID: 2b75d0a8ffcb
+Revises: 3a78dba1080a
+Create Date: 2025-09-22 10:09:26.375377
+
+This migration removes legacy columns and tables after data migration is complete.
+It should only be run after verifying all data has been successfully migrated.
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy import text
+import logging
+
+logger = logging.getLogger("alembic.runtime.migration")
+
+# revision identifiers, used by Alembic.
+revision = "2b75d0a8ffcb"
+down_revision = "3a78dba1080a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """Remove legacy columns and tables."""
+
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    logger.info("Starting schema cleanup...")
+
+    # === Step 1: Verify data migration is complete ===
+    logger.info("Verifying data migration completion...")
+
+    # Check if any chat sessions still have folder_id references
+    chat_session_columns = [
+        col["name"] for col in inspector.get_columns("chat_session")
+    ]
+    if "folder_id" in chat_session_columns:
+        orphaned_count = bind.execute(
+            text(
+                """
+            SELECT COUNT(*) FROM chat_session
+            WHERE folder_id IS NOT NULL AND project_id IS NULL
+        """
+            )
+        ).scalar_one()
+
+        if orphaned_count > 0:
+            logger.warning(
+                f"WARNING: {orphaned_count} chat_session records still have "
+                f"folder_id without project_id. Proceeding anyway."
+            )
+
+    # === Step 2: Drop chat_session.folder_id ===
+    if "folder_id" in chat_session_columns:
+        logger.info("Dropping chat_session.folder_id...")
+
+        # Drop foreign key constraint first
+        op.execute(
+            "ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_folder_fk"
+        )
+
+        # Drop the column
+        op.drop_column("chat_session", "folder_id")
+        logger.info("Dropped chat_session.folder_id")
+
+    # === Step 3: Drop persona__user_folder table ===
+    if "persona__user_folder" in inspector.get_table_names():
+        logger.info("Dropping persona__user_folder table...")
+
+        # Check for any remaining data
+        remaining = bind.execute(
+            text("SELECT COUNT(*) FROM persona__user_folder")
+        ).scalar_one()
+
+        if remaining > 0:
+            logger.warning(
+                f"WARNING: Dropping persona__user_folder with {remaining} records"
+            )
+
+        op.drop_table("persona__user_folder")
+        logger.info("Dropped persona__user_folder table")
+
+    # === Step 4: Drop chat_folder table ===
+    if "chat_folder" in inspector.get_table_names():
+        logger.info("Dropping chat_folder table...")
+
+        # Check for any remaining data
+        remaining = bind.execute(text("SELECT COUNT(*) FROM chat_folder")).scalar_one()
+
+        if remaining > 0:
+            logger.warning(f"WARNING: Dropping chat_folder with {remaining} records")
+
+        op.drop_table("chat_folder")
+        logger.info("Dropped chat_folder table")
+
+    # === Step 5: Drop user_file legacy columns ===
+    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
+
+    # Drop folder_id
+    if "folder_id" in user_file_columns:
+        logger.info("Dropping user_file.folder_id...")
+        op.drop_column("user_file", "folder_id")
+        logger.info("Dropped user_file.folder_id")
+
+    # Drop cc_pair_id (already handled in migration 5, but be sure)
+    if "cc_pair_id" in user_file_columns:
+        logger.info("Dropping user_file.cc_pair_id...")
+
+        # Drop any remaining foreign key constraints
+        bind.execute(
+            text(
+                """
+            DO $$
+            DECLARE r RECORD;
+            BEGIN
+              FOR r IN (
+                SELECT conname
+                FROM pg_constraint c
+                JOIN pg_class t ON c.conrelid = t.oid
+                WHERE c.contype = 'f'
+                  AND t.relname = 'user_file'
+                  AND EXISTS (
+                    SELECT 1 FROM pg_attribute a
+                    WHERE a.attrelid = t.oid
+                    AND a.attname = 'cc_pair_id'
+                  )
+              ) LOOP
+                EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
+              END LOOP;
+            END$$;
+        """
+            )
+        )
+
+        op.drop_column("user_file", "cc_pair_id")
+        logger.info("Dropped user_file.cc_pair_id")
+
+    # === Step 6: Clean up any remaining constraints ===
+    logger.info("Cleaning up remaining constraints...")
+
+    # Drop any unique constraints on removed columns
+    op.execute(
+        "ALTER TABLE user_file DROP CONSTRAINT IF EXISTS user_file_cc_pair_id_key"
+    )
+
+    logger.info("Migration 6 (schema cleanup) completed successfully")
+    logger.info("Legacy schema has been fully removed")
+
+
+def downgrade() -> None:
+    """Recreate dropped columns and tables (structure only, no data)."""
+
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    logger.warning("Downgrading schema cleanup - recreating structure only, no data!")
+
+    # Recreate user_file columns
+    if "user_file" in inspector.get_table_names():
+        columns = [col["name"] for col in inspector.get_columns("user_file")]
+
+        if "cc_pair_id" not in columns:
+            op.add_column(
+                "user_file", sa.Column("cc_pair_id", sa.Integer(), nullable=True)
+            )
+
+        if "folder_id" not in columns:
+            op.add_column(
+                "user_file", sa.Column("folder_id", sa.Integer(), nullable=True)
+            )
+
+    # Recreate chat_folder table
+    if "chat_folder" not in inspector.get_table_names():
+        op.create_table(
+            "chat_folder",
+            sa.Column("id", sa.Integer(), nullable=False),
+            sa.Column("user_id", sa.UUID(), nullable=False),
+            sa.Column("name", sa.String(), nullable=False),
+            sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
+            sa.PrimaryKeyConstraint("id"),
+            sa.ForeignKeyConstraint(
+                ["user_id"], ["user.id"], name="chat_folder_user_fk"
+            ),
+        )
+
+    # Recreate persona__user_folder table
+    if "persona__user_folder" not in inspector.get_table_names():
+        op.create_table(
+            "persona__user_folder",
+            sa.Column("persona_id", sa.Integer(), nullable=False),
+            sa.Column("user_folder_id", sa.Integer(), nullable=False),
+            sa.PrimaryKeyConstraint("persona_id", "user_folder_id"),
+            sa.ForeignKeyConstraint(["persona_id"], ["persona.id"]),
+            sa.ForeignKeyConstraint(["user_folder_id"], ["user_project.id"]),
+        )
+
+    # Add folder_id back to chat_session
+    if "chat_session" in inspector.get_table_names():
+        columns = [col["name"] for col in inspector.get_columns("chat_session")]
+        if "folder_id" not in columns:
+            op.add_column(
+                "chat_session", sa.Column("folder_id", sa.Integer(), nullable=True)
+            )
+
+            # Add foreign key if chat_folder exists
+            if "chat_folder" in inspector.get_table_names():
+                op.create_foreign_key(
+                    "chat_session_folder_fk",
+                    "chat_session",
+                    "chat_folder",
+                    ["folder_id"],
+                    ["id"],
+                )
+
+    logger.info("Downgrade completed - structure recreated but data is lost")
--- a/backend/alembic/versions/3a78dba1080a_user_file_legacy_data_cleanup.py
+++ b/backend/alembic/versions/3a78dba1080a_user_file_legacy_data_cleanup.py
@@ -0,0 +1,298 @@
+"""Migration 5: User file legacy data cleanup
+
+Revision ID: 3a78dba1080a
+Revises: 7cc3fcc116c1
+Create Date: 2025-09-22 10:04:27.986294
+
+This migration removes legacy user-file documents and connector_credential_pairs.
+It performs bulk deletions of obsolete data after the UUID migration.
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql as psql
+from sqlalchemy import text
+import logging
+from typing import List
+import uuid
+
+logger = logging.getLogger("alembic.runtime.migration")
+
+# revision identifiers, used by Alembic.
+revision = "3a78dba1080a"
+down_revision = "7cc3fcc116c1"
+branch_labels = None
+depends_on = None
+
+
+def batch_delete(
+    bind: sa.engine.Connection,
+    table_name: str,
+    id_column: str,
+    ids: List[str | int | uuid.UUID],
+    batch_size: int = 1000,
+    id_type: str = "int",
+) -> int:
+    """Delete records in batches to avoid memory issues and timeouts."""
+    total_count = len(ids)
+    if total_count == 0:
+        return 0
+
+    logger.info(
+        f"Starting batch deletion of {total_count} records from {table_name}..."
+    )
+
+    # Determine appropriate ARRAY type
+    if id_type == "uuid":
+        array_type = psql.ARRAY(psql.UUID(as_uuid=True))
+    elif id_type == "int":
+        array_type = psql.ARRAY(sa.Integer())
+    else:
+        array_type = psql.ARRAY(sa.String())
+
+    total_deleted = 0
+    failed_batches = []
+
+    for i in range(0, total_count, batch_size):
+        batch_ids = ids[i : i + batch_size]
+        try:
+            stmt = text(
+                f"DELETE FROM {table_name} WHERE {id_column} = ANY(:ids)"
+            ).bindparams(sa.bindparam("ids", value=batch_ids, type_=array_type))
+            result = bind.execute(stmt)
+            total_deleted += result.rowcount
+
+            # Log progress every 10 batches or at completion
+            batch_num = (i // batch_size) + 1
+            if batch_num % 10 == 0 or i + batch_size >= total_count:
+                logger.info(
+                    f"  Deleted {min(i + batch_size, total_count)}/{total_count} records "
+                    f"({total_deleted} actual) from {table_name}"
+                )
+        except Exception as e:
+            logger.error(f"Failed to delete batch {(i // batch_size) + 1}: {e}")
+            failed_batches.append((i, min(i + batch_size, total_count)))
+
+    if failed_batches:
+        logger.warning(
+            f"Failed to delete {len(failed_batches)} batches from {table_name}. "
+            f"Total deleted: {total_deleted}/{total_count}"
+        )
+        # Fail the migration to avoid silently succeeding on partial cleanup
+        raise RuntimeError(
+            f"Batch deletion failed for {table_name}: "
+            f"{len(failed_batches)} failed batches out of "
+            f"{(total_count + batch_size - 1) // batch_size}."
+        )
+
+    return total_deleted
+
+
+def upgrade() -> None:
+    """Remove legacy user-file documents and connector_credential_pairs."""
+
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    logger.info("Starting legacy data cleanup...")
+
+    # === Step 1: Identify and delete user-file documents ===
+    logger.info("Identifying user-file documents to delete...")
+
+    # Get document IDs to delete
+    doc_rows = bind.execute(
+        text(
+            """
+        SELECT DISTINCT dcc.id AS document_id
+        FROM document_by_connector_credential_pair dcc
+        JOIN connector_credential_pair u
+          ON u.connector_id = dcc.connector_id
+         AND u.credential_id = dcc.credential_id
+        WHERE u.is_user_file IS TRUE
+    """
+        )
+    ).fetchall()
+
+    doc_ids = [r[0] for r in doc_rows]
+
+    if doc_ids:
+        logger.info(f"Found {len(doc_ids)} user-file documents to delete")
+
+        # Delete dependent rows first
+        tables_to_clean = [
+            ("document_retrieval_feedback", "document_id"),
+            ("document__tag", "document_id"),
+            ("chunk_stats", "document_id"),
+        ]
+
+        for table_name, column_name in tables_to_clean:
+            if table_name in inspector.get_table_names():
+                # document_id is a string in these tables
+                deleted = batch_delete(
+                    bind, table_name, column_name, doc_ids, id_type="str"
+                )
+                logger.info(f"Deleted {deleted} records from {table_name}")
+
+        # Delete document_by_connector_credential_pair entries
+        deleted = batch_delete(
+            bind, "document_by_connector_credential_pair", "id", doc_ids, id_type="str"
+        )
+        logger.info(f"Deleted {deleted} document_by_connector_credential_pair records")
+
+        # Delete documents themselves
+        deleted = batch_delete(bind, "document", "id", doc_ids, id_type="str")
+        logger.info(f"Deleted {deleted} document records")
+    else:
+        logger.info("No user-file documents found to delete")
+
+    # === Step 2: Clean up user-file connector_credential_pairs ===
+    logger.info("Cleaning up user-file connector_credential_pairs...")
+
+    # Get cc_pair IDs
+    cc_pair_rows = bind.execute(
+        text(
+            """
+        SELECT id AS cc_pair_id
+        FROM connector_credential_pair
+        WHERE is_user_file IS TRUE
+    """
+        )
+    ).fetchall()
+
+    cc_pair_ids = [r[0] for r in cc_pair_rows]
+
+    if cc_pair_ids:
+        logger.info(
+            f"Found {len(cc_pair_ids)} user-file connector_credential_pairs to clean up"
+        )
+
+        # Delete related records
+        # Clean child tables first to satisfy foreign key constraints,
+        # then the parent tables
+        tables_to_clean = [
+            ("index_attempt_errors", "connector_credential_pair_id"),
+            ("index_attempt", "connector_credential_pair_id"),
+            ("background_error", "cc_pair_id"),
+            ("document_set__connector_credential_pair", "connector_credential_pair_id"),
+            ("user_group__connector_credential_pair", "cc_pair_id"),
+        ]
+
+        for table_name, column_name in tables_to_clean:
+            if table_name in inspector.get_table_names():
+                deleted = batch_delete(
+                    bind, table_name, column_name, cc_pair_ids, id_type="int"
+                )
+                logger.info(f"Deleted {deleted} records from {table_name}")
+
+    # === Step 3: Identify connectors and credentials to delete ===
+    logger.info("Identifying orphaned connectors and credentials...")
+
+    # Get connectors used only by user-file cc_pairs
+    connector_rows = bind.execute(
+        text(
+            """
+        SELECT DISTINCT ccp.connector_id
+        FROM connector_credential_pair ccp
+        WHERE ccp.is_user_file IS TRUE
+          AND ccp.connector_id != 0  -- Exclude system default
+          AND NOT EXISTS (
+            SELECT 1
+            FROM connector_credential_pair c2
+            WHERE c2.connector_id = ccp.connector_id
+              AND c2.is_user_file IS NOT TRUE
+          )
+    """
+        )
+    ).fetchall()
+
+    userfile_only_connector_ids = [r[0] for r in connector_rows]
+
+    # Get credentials used only by user-file cc_pairs
+    credential_rows = bind.execute(
+        text(
+            """
+        SELECT DISTINCT ccp.credential_id
+        FROM connector_credential_pair ccp
+        WHERE ccp.is_user_file IS TRUE
+          AND ccp.credential_id != 0  -- Exclude public/default
+          AND NOT EXISTS (
+            SELECT 1
+            FROM connector_credential_pair c2
+            WHERE c2.credential_id = ccp.credential_id
+              AND c2.is_user_file IS NOT TRUE
+          )
+    """
+        )
+    ).fetchall()
+
+    userfile_only_credential_ids = [r[0] for r in credential_rows]
+
+    # === Step 4: Delete the cc_pairs themselves ===
+    if cc_pair_ids:
+        # Remove FK dependency from user_file first
+        bind.execute(
+            text(
+                """
+            DO $$
+            DECLARE r RECORD;
+            BEGIN
+              FOR r IN (
+                SELECT conname
+                FROM pg_constraint c
+                JOIN pg_class t ON c.conrelid = t.oid
+                JOIN pg_class ft ON c.confrelid = ft.oid
+                WHERE c.contype = 'f'
+                  AND t.relname = 'user_file'
+                  AND ft.relname = 'connector_credential_pair'
+              ) LOOP
+                EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
+              END LOOP;
+            END$$;
+        """
+            )
+        )
+
+        # Delete cc_pairs
+        deleted = batch_delete(
+            bind, "connector_credential_pair", "id", cc_pair_ids, id_type="int"
+        )
+        logger.info(f"Deleted {deleted} connector_credential_pair records")
+
+    # === Step 5: Delete orphaned connectors ===
+    if userfile_only_connector_ids:
+        deleted = batch_delete(
+            bind, "connector", "id", userfile_only_connector_ids, id_type="int"
+        )
+        logger.info(f"Deleted {deleted} orphaned connector records")
+
+    # === Step 6: Delete orphaned credentials ===
+    if userfile_only_credential_ids:
+        # Clean up credential__user_group mappings first
+        deleted = batch_delete(
+            bind,
+            "credential__user_group",
+            "credential_id",
+            userfile_only_credential_ids,
+            id_type="int",
+        )
+        logger.info(f"Deleted {deleted} credential__user_group records")
+
+        # Delete credentials
+        deleted = batch_delete(
+            bind, "credential", "id", userfile_only_credential_ids, id_type="int"
+        )
+        logger.info(f"Deleted {deleted} orphaned credential records")
+
+    logger.info("Migration 5 (legacy data cleanup) completed successfully")
+
+
+def downgrade() -> None:
+    """Cannot restore deleted data - requires backup restoration."""
+
+    logger.error("CRITICAL: Downgrading data cleanup cannot restore deleted data!")
+    logger.error("Data restoration requires backup files or database backup.")
+
+    raise NotImplementedError(
+        "Downgrade of legacy data cleanup is not supported. "
+        "Deleted data must be restored from backups."
+    )
--- a/backend/alembic/versions/505c488f6662_merge_default_assistants_into_unified.py
+++ b/backend/alembic/versions/505c488f6662_merge_default_assistants_into_unified.py
@@ -0,0 +1,380 @@
+"""merge_default_assistants_into_unified
+
+Revision ID: 505c488f6662
+Revises: d09fc20a3c66
+Create Date: 2025-09-09 19:00:56.816626
+
+"""
+
+import json
+from typing import Any
+from typing import NamedTuple
+from uuid import UUID
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "505c488f6662"
+down_revision = "d09fc20a3c66"
+branch_labels = None
+depends_on = None
+
+# Constants for the unified assistant
+UNIFIED_ASSISTANT_NAME = "Assistant"
+UNIFIED_ASSISTANT_DESCRIPTION = (
+    "Your AI assistant with search, web browsing, and image generation capabilities."
+)
+UNIFIED_ASSISTANT_NUM_CHUNKS = 25
+UNIFIED_ASSISTANT_DISPLAY_PRIORITY = 0
+UNIFIED_ASSISTANT_LLM_FILTER_EXTRACTION = True
+UNIFIED_ASSISTANT_LLM_RELEVANCE_FILTER = False
+UNIFIED_ASSISTANT_RECENCY_BIAS = "AUTO"  # NOTE: needs to be capitalized
+UNIFIED_ASSISTANT_CHUNKS_ABOVE = 0
+UNIFIED_ASSISTANT_CHUNKS_BELOW = 0
+UNIFIED_ASSISTANT_DATETIME_AWARE = True
+
+# NOTE: tool specific prompts are handled on the fly and automatically injected
+# into the prompt before passing to the LLM.
+DEFAULT_SYSTEM_PROMPT = """
+You are a highly capable, thoughtful, and precise assistant. Your goal is to deeply understand the \
+user's intent, ask clarifying questions when needed, think step-by-step through complex problems, \
+provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always \
+prioritize being truthful, nuanced, insightful, and efficient.
+The current date is [[CURRENT_DATETIME]]
+
+You use different text styles, bolding, emojis (sparingly), block quotes, and other formatting to make \
+your responses more readable and engaging.
+You use proper Markdown and LaTeX to format your responses for math, scientific, and chemical formulas, \
+symbols, etc.: '$$\\n[expression]\\n$$' for standalone cases and '\\( [expression] \\)' when inline.
+For code you prefer to use Markdown and specify the language.
+You can use Markdown horizontal rules (---) to separate sections of your responses.
+You can use Markdown tables to format your responses for data, lists, and other structured information.
+""".strip()
+
+
+INSERT_DICT: dict[str, Any] = {
+    "name": UNIFIED_ASSISTANT_NAME,
+    "description": UNIFIED_ASSISTANT_DESCRIPTION,
+    "system_prompt": DEFAULT_SYSTEM_PROMPT,
+    "num_chunks": UNIFIED_ASSISTANT_NUM_CHUNKS,
+    "display_priority": UNIFIED_ASSISTANT_DISPLAY_PRIORITY,
+    "llm_filter_extraction": UNIFIED_ASSISTANT_LLM_FILTER_EXTRACTION,
+    "llm_relevance_filter": UNIFIED_ASSISTANT_LLM_RELEVANCE_FILTER,
+    "recency_bias": UNIFIED_ASSISTANT_RECENCY_BIAS,
+    "chunks_above": UNIFIED_ASSISTANT_CHUNKS_ABOVE,
+    "chunks_below": UNIFIED_ASSISTANT_CHUNKS_BELOW,
+    "datetime_aware": UNIFIED_ASSISTANT_DATETIME_AWARE,
+}
+
+GENERAL_ASSISTANT_ID = -1
+ART_ASSISTANT_ID = -3
+
+
+class UserRow(NamedTuple):
+    """Typed representation of user row from database query."""
+
+    id: UUID
+    chosen_assistants: list[int] | None
+    visible_assistants: list[int] | None
+    hidden_assistants: list[int] | None
+    pinned_assistants: list[int] | None
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+
+    # Start transaction
+    conn.execute(sa.text("BEGIN"))
+
+    try:
+        # Step 1: Create or update the unified assistant (ID 0)
+        search_assistant = conn.execute(
+            sa.text("SELECT * FROM persona WHERE id = 0")
+        ).fetchone()
+
+        if search_assistant:
+            # Update existing Search assistant to be the unified assistant
+            conn.execute(
+                sa.text(
+                    """
+                    UPDATE persona
+                    SET name = :name,
+                        description = :description,
+                        system_prompt = :system_prompt,
+                        num_chunks = :num_chunks,
+                        is_default_persona = true,
+                        is_visible = true,
+                        deleted = false,
+                        display_priority = :display_priority,
+                        llm_filter_extraction = :llm_filter_extraction,
+                        llm_relevance_filter = :llm_relevance_filter,
+                        recency_bias = :recency_bias,
+                        chunks_above = :chunks_above,
+                        chunks_below = :chunks_below,
+                        datetime_aware = :datetime_aware,
+                        starter_messages = null
+                    WHERE id = 0
+                """
+                ),
+                INSERT_DICT,
+            )
+        else:
+            # Create new unified assistant with ID 0
+            conn.execute(
+                sa.text(
+                    """
+                    INSERT INTO persona (
+                        id, name, description, system_prompt, num_chunks,
+                        is_default_persona, is_visible, deleted, display_priority,
+                        llm_filter_extraction, llm_relevance_filter, recency_bias,
+                        chunks_above, chunks_below, datetime_aware, starter_messages,
+                        builtin_persona
+                    ) VALUES (
+                        0, :name, :description, :system_prompt, :num_chunks,
+                        true, true, false, :display_priority, :llm_filter_extraction,
+                        :llm_relevance_filter, :recency_bias, :chunks_above, :chunks_below,
+                        :datetime_aware, null, true
+                    )
+                """
+                ),
+                INSERT_DICT,
+            )
+
+        # Step 2: Mark ALL builtin assistants as deleted (except the unified assistant ID 0)
+        conn.execute(
+            sa.text(
+                """
+                UPDATE persona
+                SET deleted = true, is_visible = false, is_default_persona = false
+                WHERE builtin_persona = true AND id != 0
+            """
+            )
+        )
+
+        # Step 3: Add all built-in tools to the unified assistant
+        # First, get the tool IDs for SearchTool, ImageGenerationTool, and WebSearchTool
+        search_tool = conn.execute(
+            sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'SearchTool'")
+        ).fetchone()
+
+        if not search_tool:
+            raise ValueError(
+                "SearchTool not found in database. Ensure tools migration has run first."
+            )
+
+        image_gen_tool = conn.execute(
+            sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'ImageGenerationTool'")
+        ).fetchone()
+
+        if not image_gen_tool:
+            raise ValueError(
+                "ImageGenerationTool not found in database. Ensure tools migration has run first."
+            )
+
+        # WebSearchTool is optional - may not be configured
+        web_search_tool = conn.execute(
+            sa.text("SELECT id FROM tool WHERE in_code_tool_id = 'WebSearchTool'")
+        ).fetchone()
+
+        # Clear existing tool associations for persona 0
+        conn.execute(sa.text("DELETE FROM persona__tool WHERE persona_id = 0"))
+
+        # Add tools to the unified assistant
+        conn.execute(
+            sa.text(
+                """
+                INSERT INTO persona__tool (persona_id, tool_id)
+                VALUES (0, :tool_id)
+                ON CONFLICT DO NOTHING
+            """
+            ),
+            {"tool_id": search_tool[0]},
+        )
+
+        conn.execute(
+            sa.text(
+                """
+                INSERT INTO persona__tool (persona_id, tool_id)
+                VALUES (0, :tool_id)
+                ON CONFLICT DO NOTHING
+            """
+            ),
+            {"tool_id": image_gen_tool[0]},
+        )
+
+        if web_search_tool:
+            conn.execute(
+                sa.text(
+                    """
+                    INSERT INTO persona__tool (persona_id, tool_id)
+                    VALUES (0, :tool_id)
+                    ON CONFLICT DO NOTHING
+                """
+                ),
+                {"tool_id": web_search_tool[0]},
+            )
+
+        # Step 4: Migrate existing chat sessions from all builtin assistants to unified assistant
+        conn.execute(
+            sa.text(
+                """
+                UPDATE chat_session
+                SET persona_id = 0
+                WHERE persona_id IN (
+                    SELECT id FROM persona WHERE builtin_persona = true AND id != 0
+                )
+            """
+            )
+        )
+
+        # Step 5: Migrate user preferences - remove references to all builtin assistants
+        # First, get all builtin assistant IDs (except 0)
+        builtin_assistants_result = conn.execute(
+            sa.text(
+                """
+                SELECT id FROM persona
+                WHERE builtin_persona = true AND id != 0
+            """
+            )
+        ).fetchall()
+        builtin_assistant_ids = [row[0] for row in builtin_assistants_result]
+
+        # Get all users with preferences
+        users_result = conn.execute(
+            sa.text(
+                """
+                SELECT id, chosen_assistants, visible_assistants,
+                       hidden_assistants, pinned_assistants
+                FROM "user"
+            """
+            )
+        ).fetchall()
+
+        for user_row in users_result:
+            user = UserRow(*user_row)
+            user_id: UUID = user.id
+            updates: dict[str, Any] = {}
+
+            # Remove all builtin assistants from chosen_assistants
+            if user.chosen_assistants:
+                new_chosen: list[int] = [
+                    assistant_id
+                    for assistant_id in user.chosen_assistants
+                    if assistant_id not in builtin_assistant_ids
+                ]
+                if new_chosen != user.chosen_assistants:
+                    updates["chosen_assistants"] = json.dumps(new_chosen)
+
+            # Remove all builtin assistants from visible_assistants
+            if user.visible_assistants:
+                new_visible: list[int] = [
+                    assistant_id
+                    for assistant_id in user.visible_assistants
+                    if assistant_id not in builtin_assistant_ids
+                ]
+                if new_visible != user.visible_assistants:
+                    updates["visible_assistants"] = json.dumps(new_visible)
+
+            # Add all builtin assistants to hidden_assistants
+            if user.hidden_assistants:
+                new_hidden: list[int] = list(user.hidden_assistants)
+                for old_id in builtin_assistant_ids:
+                    if old_id not in new_hidden:
+                        new_hidden.append(old_id)
+                if new_hidden != user.hidden_assistants:
+                    updates["hidden_assistants"] = json.dumps(new_hidden)
+            else:
+                updates["hidden_assistants"] = json.dumps(builtin_assistant_ids)
+
+            # Remove all builtin assistants from pinned_assistants
+            if user.pinned_assistants:
+                new_pinned: list[int] = [
+                    assistant_id
+                    for assistant_id in user.pinned_assistants
+                    if assistant_id not in builtin_assistant_ids
+                ]
+                if new_pinned != user.pinned_assistants:
+                    updates["pinned_assistants"] = json.dumps(new_pinned)
+
+            # Apply updates if any
+            if updates:
+                set_clause = ", ".join([f"{k} = :{k}" for k in updates.keys()])
+                updates["user_id"] = str(user_id)  # Convert UUID to string for SQL
+                conn.execute(
+                    sa.text(f'UPDATE "user" SET {set_clause} WHERE id = :user_id'),
+                    updates,
+                )
+
+        # Commit transaction
+        conn.execute(sa.text("COMMIT"))
+
+    except Exception as e:
+        # Rollback on error
+        conn.execute(sa.text("ROLLBACK"))
+        raise e
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+
+    # Start transaction
+    conn.execute(sa.text("BEGIN"))
+
+    try:
+        # Only restore General (ID -1) and Art (ID -3) assistants
+        # Step 1: Keep Search assistant (ID 0) as default but restore original state
+        conn.execute(
+            sa.text(
+                """
+                UPDATE persona
+                SET is_default_persona = true,
+                    is_visible = true,
+                    deleted = false
+                WHERE id = 0
+            """
+            )
+        )
+
+        # Step 2: Restore General assistant (ID -1)
+        conn.execute(
+            sa.text(
+                """
+                UPDATE persona
+                SET deleted = false,
+                    is_visible = true,
+                    is_default_persona = true
+                WHERE id = :general_assistant_id
+            """
+            ),
+            {"general_assistant_id": GENERAL_ASSISTANT_ID},
+        )
+
+        # Step 3: Restore Art assistant (ID -3)
+        conn.execute(
+            sa.text(
+                """
+                UPDATE persona
+                SET deleted = false,
+                    is_visible = true,
+                    is_default_persona = true
+                WHERE id = :art_assistant_id
+            """
+            ),
+            {"art_assistant_id": ART_ASSISTANT_ID},
+        )
+
+        # Note: We don't restore the original tool associations, names, or descriptions
+        # as those would require more complex logic to determine original state.
+        # We also cannot restore original chat session persona_ids as we don't
+        # have the original mappings.
+        # Other builtin assistants remain deleted as per the requirement.
+
+        # Commit transaction
+        conn.execute(sa.text("COMMIT"))
+
+    except Exception as e:
+        # Rollback on error
+        conn.execute(sa.text("ROLLBACK"))
+        raise e
--- a/backend/alembic/versions/7cc3fcc116c1_user_file_uuid_primary_key_swap.py
+++ b/backend/alembic/versions/7cc3fcc116c1_user_file_uuid_primary_key_swap.py
@@ -0,0 +1,193 @@
+"""Migration 4: User file UUID primary key swap
+
+Revision ID: 7cc3fcc116c1
+Revises: 16c37a30adf2
+Create Date: 2025-09-22 09:54:38.292952
+
+This migration performs the critical UUID primary key swap on user_file table.
+It updates all foreign key references to use UUIDs instead of integers.
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql as psql
+import logging
+
+logger = logging.getLogger("alembic.runtime.migration")
+
+# revision identifiers, used by Alembic.
+revision = "7cc3fcc116c1"
+down_revision = "16c37a30adf2"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """Swap user_file primary key from integer to UUID."""
+
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    # Verify we're in the expected state
+    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
+    if "new_id" not in user_file_columns:
+        logger.warning(
+            "user_file.new_id not found - migration may have already been applied"
+        )
+        return
+
+    logger.info("Starting UUID primary key swap...")
+
+    # === Step 1: Update persona__user_file foreign key to UUID ===
+    logger.info("Updating persona__user_file foreign key...")
+
+    # Drop existing foreign key constraints
+    op.execute(
+        "ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_user_file_id_uuid_fkey"
+    )
+    op.execute(
+        "ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_user_file_id_fkey"
+    )
+
+    # Create new foreign key to user_file.new_id
+    op.create_foreign_key(
+        "persona__user_file_user_file_id_fkey",
+        "persona__user_file",
+        "user_file",
+        local_cols=["user_file_id_uuid"],
+        remote_cols=["new_id"],
+    )
+
+    # Drop the old integer column and rename UUID column
+    op.execute("ALTER TABLE persona__user_file DROP COLUMN IF EXISTS user_file_id")
+    op.alter_column(
+        "persona__user_file",
+        "user_file_id_uuid",
+        new_column_name="user_file_id",
+        existing_type=psql.UUID(as_uuid=True),
+        nullable=False,
+    )
+
+    # Recreate composite primary key
+    op.execute(
+        "ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_pkey"
+    )
+    op.execute(
+        "ALTER TABLE persona__user_file ADD PRIMARY KEY (persona_id, user_file_id)"
+    )
+
+    logger.info("Updated persona__user_file to use UUID foreign key")
+
+    # === Step 2: Perform the primary key swap on user_file ===
+    logger.info("Swapping user_file primary key to UUID...")
+
+    # Drop the primary key constraint
+    op.execute("ALTER TABLE user_file DROP CONSTRAINT IF EXISTS user_file_pkey")
+
+    # Drop the old id column and rename new_id to id
+    op.execute("ALTER TABLE user_file DROP COLUMN IF EXISTS id")
+    op.alter_column(
+        "user_file",
+        "new_id",
+        new_column_name="id",
+        existing_type=psql.UUID(as_uuid=True),
+        nullable=False,
+    )
+
+    # Set default for new inserts
+    op.alter_column(
+        "user_file",
+        "id",
+        existing_type=psql.UUID(as_uuid=True),
+        server_default=sa.text("gen_random_uuid()"),
+    )
+
+    # Create new primary key
+    op.execute("ALTER TABLE user_file ADD PRIMARY KEY (id)")
+
+    logger.info("Swapped user_file primary key to UUID")
+
+    # === Step 3: Update foreign key constraints ===
+    logger.info("Updating foreign key constraints...")
+
+    # Recreate persona__user_file foreign key to point to user_file.id
+    # Drop existing FK first to break dependency on the unique constraint
+    op.execute(
+        "ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_user_file_id_fkey"
+    )
+    # Drop the unique constraint on (formerly) new_id BEFORE recreating the FK,
+    # so the FK will bind to the primary key instead of the unique index.
+    op.execute("ALTER TABLE user_file DROP CONSTRAINT IF EXISTS uq_user_file_new_id")
+    # Now recreate FK to the primary key column
+    op.create_foreign_key(
+        "persona__user_file_user_file_id_fkey",
+        "persona__user_file",
+        "user_file",
+        local_cols=["user_file_id"],
+        remote_cols=["id"],
+    )
+
+    # Add foreign keys for project__user_file
+    existing_fks = inspector.get_foreign_keys("project__user_file")
+
+    has_user_file_fk = any(
+        fk.get("referred_table") == "user_file"
+        and fk.get("constrained_columns") == ["user_file_id"]
+        for fk in existing_fks
+    )
+
+    if not has_user_file_fk:
+        op.create_foreign_key(
+            "fk_project__user_file_user_file_id",
+            "project__user_file",
+            "user_file",
+            ["user_file_id"],
+            ["id"],
+        )
+        logger.info("Added project__user_file -> user_file foreign key")
+
+    has_project_fk = any(
+        fk.get("referred_table") == "user_project"
+        and fk.get("constrained_columns") == ["project_id"]
+        for fk in existing_fks
+    )
+
+    if not has_project_fk:
+        op.create_foreign_key(
+            "fk_project__user_file_project_id",
+            "project__user_file",
+            "user_project",
+            ["project_id"],
+            ["id"],
+        )
+        logger.info("Added project__user_file -> user_project foreign key")
+
+    # === Step 4: Mark files for document_id migration ===
+    logger.info("Marking files for background document_id migration...")
+
+    logger.info("Migration 4 (UUID primary key swap) completed successfully")
+    logger.info(
+        "NOTE: Background task will update document IDs in Vespa and search_doc"
+    )
+
+
+def downgrade() -> None:
+    """Revert UUID primary key back to integer (data destructive!)."""
+
+    logger.error("CRITICAL: Downgrading UUID primary key swap is data destructive!")
+    logger.error(
+        "This will break all UUID-based references created after the migration."
+    )
+    logger.error("Only proceed if absolutely necessary and have backups.")
+
+    # The downgrade would need to:
+    # 1. Add back integer columns
+    # 2. Generate new sequential IDs
+    # 3. Update all foreign key references
+    # 4. Swap primary keys back
+    # This is complex and risky, so we raise an error instead
+
+    raise NotImplementedError(
+        "Downgrade of UUID primary key swap is not supported due to data loss risk. "
+        "Manual intervention with data backup/restore is required."
+    )
--- a/backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
+++ b/backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
@@ -0,0 +1,257 @@
+"""Migration 1: User file schema additions
+
+Revision ID: 9b66d3156fc6
+Revises: b4ef3ae0bf6e
+Create Date: 2025-09-22 09:42:06.086732
+
+This migration adds new columns and tables without modifying existing data.
+It is safe to run and can be easily rolled back.
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql as psql
+import logging
+
+logger = logging.getLogger("alembic.runtime.migration")
+# revision identifiers, used by Alembic.
+revision = "9b66d3156fc6"
+down_revision = "b4ef3ae0bf6e"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """Add new columns and tables without modifying existing data."""
+
+    # Enable pgcrypto for UUID generation
+    op.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto")
+
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    # === USER_FILE: Add new columns ===
+    logger.info("Adding new columns to user_file table...")
+
+    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
+
+    # Check if ID is already UUID (in case of re-run after partial migration)
+    id_is_uuid = any(
+        col["name"] == "id" and "uuid" in str(col["type"]).lower()
+        for col in inspector.get_columns("user_file")
+    )
+
+    # Add transitional UUID column only if ID is not already UUID
+    if "new_id" not in user_file_columns and not id_is_uuid:
+        op.add_column(
+            "user_file",
+            sa.Column(
+                "new_id",
+                psql.UUID(as_uuid=True),
+                nullable=True,
+                server_default=sa.text("gen_random_uuid()"),
+            ),
+        )
+        op.create_unique_constraint("uq_user_file_new_id", "user_file", ["new_id"])
+        logger.info("Added new_id column to user_file")
+
+    # Add status column
+    if "status" not in user_file_columns:
+        op.add_column(
+            "user_file",
+            sa.Column(
+                "status",
+                sa.Enum(
+                    "PROCESSING",
+                    "COMPLETED",
+                    "FAILED",
+                    "CANCELED",
+                    name="userfilestatus",
+                    native_enum=False,
+                ),
+                nullable=False,
+                server_default="PROCESSING",
+            ),
+        )
+        logger.info("Added status column to user_file")
+
+    # Add other tracking columns
+    if "chunk_count" not in user_file_columns:
+        op.add_column(
+            "user_file", sa.Column("chunk_count", sa.Integer(), nullable=True)
+        )
+        logger.info("Added chunk_count column to user_file")
+
+    if "last_accessed_at" not in user_file_columns:
+        op.add_column(
+            "user_file",
+            sa.Column("last_accessed_at", sa.DateTime(timezone=True), nullable=True),
+        )
+        logger.info("Added last_accessed_at column to user_file")
+
+    if "needs_project_sync" not in user_file_columns:
+        op.add_column(
+            "user_file",
+            sa.Column(
+                "needs_project_sync",
+                sa.Boolean(),
+                nullable=False,
+                server_default=sa.text("false"),
+            ),
+        )
+        logger.info("Added needs_project_sync column to user_file")
+
+    if "last_project_sync_at" not in user_file_columns:
+        op.add_column(
+            "user_file",
+            sa.Column(
+                "last_project_sync_at", sa.DateTime(timezone=True), nullable=True
+            ),
+        )
+        logger.info("Added last_project_sync_at column to user_file")
+
+    if "document_id_migrated" not in user_file_columns:
+        op.add_column(
+            "user_file",
+            sa.Column(
+                "document_id_migrated",
+                sa.Boolean(),
+                nullable=False,
+                server_default=sa.text("true"),
+            ),
+        )
+        logger.info("Added document_id_migrated column to user_file")
+
+    # === USER_FOLDER -> USER_PROJECT rename ===
+    table_names = set(inspector.get_table_names())
+
+    if "user_folder" in table_names:
+        logger.info("Updating user_folder table...")
+        # Make description nullable first
+        op.alter_column("user_folder", "description", nullable=True)
+
+        # Rename table if user_project doesn't exist
+        if "user_project" not in table_names:
+            op.execute("ALTER TABLE user_folder RENAME TO user_project")
+            logger.info("Renamed user_folder to user_project")
+    elif "user_project" in table_names:
+        # If already renamed, ensure column nullability
+        project_cols = [col["name"] for col in inspector.get_columns("user_project")]
+        if "description" in project_cols:
+            op.alter_column("user_project", "description", nullable=True)
+
+    # Add instructions column to user_project
+    inspector = sa.inspect(bind)  # Refresh after rename
+    if "user_project" in inspector.get_table_names():
+        project_columns = [col["name"] for col in inspector.get_columns("user_project")]
+        if "instructions" not in project_columns:
+            op.add_column(
+                "user_project",
+                sa.Column("instructions", sa.String(), nullable=True),
+            )
+            logger.info("Added instructions column to user_project")
+
+    # === CHAT_SESSION: Add project_id ===
+    chat_session_columns = [
+        col["name"] for col in inspector.get_columns("chat_session")
+    ]
+    if "project_id" not in chat_session_columns:
+        op.add_column(
+            "chat_session",
+            sa.Column("project_id", sa.Integer(), nullable=True),
+        )
+        logger.info("Added project_id column to chat_session")
+
+    # === PERSONA__USER_FILE: Add UUID column ===
+    persona_user_file_columns = [
+        col["name"] for col in inspector.get_columns("persona__user_file")
+    ]
+    if "user_file_id_uuid" not in persona_user_file_columns:
+        op.add_column(
+            "persona__user_file",
+            sa.Column("user_file_id_uuid", psql.UUID(as_uuid=True), nullable=True),
+        )
+        logger.info("Added user_file_id_uuid column to persona__user_file")
+
+    # === PROJECT__USER_FILE: Create new table ===
+    if "project__user_file" not in inspector.get_table_names():
+        op.create_table(
+            "project__user_file",
+            sa.Column("project_id", sa.Integer(), nullable=False),
+            sa.Column("user_file_id", psql.UUID(as_uuid=True), nullable=False),
+            sa.PrimaryKeyConstraint("project_id", "user_file_id"),
+        )
+        op.create_index(
+            "idx_project__user_file_user_file_id",
+            "project__user_file",
+            ["user_file_id"],
+        )
+        logger.info("Created project__user_file table")
+
+    logger.info("Migration 1 (schema additions) completed successfully")
+
+
+def downgrade() -> None:
+    """Remove added columns and tables."""
+
+    bind = op.get_bind()
+    inspector = sa.inspect(bind)
+
+    logger.info("Starting downgrade of schema additions...")
+
+    # Drop project__user_file table
+    if "project__user_file" in inspector.get_table_names():
+        op.drop_index("idx_project__user_file_user_file_id", "project__user_file")
+        op.drop_table("project__user_file")
+        logger.info("Dropped project__user_file table")
+
+    # Remove columns from persona__user_file
+    if "persona__user_file" in inspector.get_table_names():
+        columns = [col["name"] for col in inspector.get_columns("persona__user_file")]
+        if "user_file_id_uuid" in columns:
+            op.drop_column("persona__user_file", "user_file_id_uuid")
+            logger.info("Dropped user_file_id_uuid from persona__user_file")
+
+    # Remove columns from chat_session
+    if "chat_session" in inspector.get_table_names():
+        columns = [col["name"] for col in inspector.get_columns("chat_session")]
+        if "project_id" in columns:
+            op.drop_column("chat_session", "project_id")
+            logger.info("Dropped project_id from chat_session")
+
+    # Rename user_project back to user_folder and remove instructions
+    if "user_project" in inspector.get_table_names():
+        columns = [col["name"] for col in inspector.get_columns("user_project")]
+        if "instructions" in columns:
+            op.drop_column("user_project", "instructions")
+        op.execute("ALTER TABLE user_project RENAME TO user_folder")
+        op.alter_column("user_folder", "description", nullable=False)
+        logger.info("Renamed user_project back to user_folder")
+
+    # Remove columns from user_file
+    if "user_file" in inspector.get_table_names():
+        columns = [col["name"] for col in inspector.get_columns("user_file")]
+
+        columns_to_drop = [
+            "document_id_migrated",
+            "last_project_sync_at",
+            "needs_project_sync",
+            "last_accessed_at",
+            "chunk_count",
+            "status",
+        ]
+
+        for col in columns_to_drop:
+            if col in columns:
+                op.drop_column("user_file", col)
+                logger.info(f"Dropped {col} from user_file")
+
+        if "new_id" in columns:
+            op.drop_constraint("uq_user_file_new_id", "user_file", type_="unique")
+            op.drop_column("user_file", "new_id")
+            logger.info("Dropped new_id from user_file")
+
+    # Drop enum type if no columns use it
+    bind.execute(sa.text("DROP TYPE IF EXISTS userfilestatus"))
+
+    logger.info("Downgrade completed successfully")
--- a/backend/alembic/versions/abbfec3a5ac5_merge_prompt_into_persona.py
+++ b/backend/alembic/versions/abbfec3a5ac5_merge_prompt_into_persona.py
@@ -0,0 +1,225 @@
+"""merge prompt into persona
+
+Revision ID: abbfec3a5ac5
+Revises: 8818cf73fa1a
+Create Date: 2024-12-19 12:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "abbfec3a5ac5"
+down_revision = "8818cf73fa1a"
+branch_labels = None
+depends_on = None
+
+
+MAX_PROMPT_LENGTH = 5_000_000
+
+
+def upgrade() -> None:
+    """NOTE: Prompts without any Personas will just be lost."""
+    # Step 1: Add new columns to persona table (only if they don't exist)
+
+    # Check if columns exist before adding them
+    connection = op.get_bind()
+    inspector = sa.inspect(connection)
+    existing_columns = [col["name"] for col in inspector.get_columns("persona")]
+
+    if "system_prompt" not in existing_columns:
+        op.add_column(
+            "persona",
+            sa.Column(
+                "system_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=True
+            ),
+        )
+
+    if "task_prompt" not in existing_columns:
+        op.add_column(
+            "persona",
+            sa.Column(
+                "task_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=True
+            ),
+        )
+
+    if "datetime_aware" not in existing_columns:
+        op.add_column(
+            "persona",
+            sa.Column(
+                "datetime_aware", sa.Boolean(), nullable=False, server_default="true"
+            ),
+        )
+
+    # Step 2: Migrate data from prompt table to persona table (only if tables exist)
+    existing_tables = inspector.get_table_names()
+
+    if "prompt" in existing_tables and "persona__prompt" in existing_tables:
+        # For personas that have associated prompts, copy the prompt data
+        op.execute(
+            """
+            UPDATE persona
+            SET
+                system_prompt = p.system_prompt,
+                task_prompt = p.task_prompt,
+                datetime_aware = p.datetime_aware
+            FROM (
+                -- Get the first prompt for each persona (in case there are multiple)
+                SELECT DISTINCT ON (pp.persona_id)
+                    pp.persona_id,
+                    pr.system_prompt,
+                    pr.task_prompt,
+                    pr.datetime_aware
+                FROM persona__prompt pp
+                JOIN prompt pr ON pp.prompt_id = pr.id
+            ) p
+            WHERE persona.id = p.persona_id
+        """
+        )
+
+        # Step 3: Update chat_message references
+        # Since chat messages referenced prompt_id, we need to update them to use persona_id
+        # This is complex as we need to map from prompt_id to persona_id
+
+        # Check if chat_message has prompt_id column
+        chat_message_columns = [
+            col["name"] for col in inspector.get_columns("chat_message")
+        ]
+        if "prompt_id" in chat_message_columns:
+            op.execute(
+                """
+                ALTER TABLE chat_message
+                DROP CONSTRAINT IF EXISTS chat_message__prompt_fk
+            """
+            )
+            op.drop_column("chat_message", "prompt_id")
+
+    # Step 4: Handle personas without prompts - set default values if needed (always run this)
+    op.execute(
+        """
+        UPDATE persona
+        SET
+            system_prompt = COALESCE(system_prompt, ''),
+            task_prompt = COALESCE(task_prompt, '')
+        WHERE system_prompt IS NULL OR task_prompt IS NULL
+    """
+    )
+
+    # Step 5: Drop the persona__prompt association table (if it exists)
+    if "persona__prompt" in existing_tables:
+        op.drop_table("persona__prompt")
+
+    # Step 6: Drop the prompt table (if it exists)
+    if "prompt" in existing_tables:
+        op.drop_table("prompt")
+
+    # Step 7: Make system_prompt and task_prompt non-nullable after migration (only if they exist)
+    op.alter_column(
+        "persona",
+        "system_prompt",
+        existing_type=sa.String(length=MAX_PROMPT_LENGTH),
+        nullable=False,
+        server_default=None,
+    )
+
+    op.alter_column(
+        "persona",
+        "task_prompt",
+        existing_type=sa.String(length=MAX_PROMPT_LENGTH),
+        nullable=False,
+        server_default=None,
+    )
+
+
+def downgrade() -> None:
+    # Step 1: Recreate the prompt table
+    op.create_table(
+        "prompt",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("name", sa.String(), nullable=False),
+        sa.Column("description", sa.String(), nullable=False),
+        sa.Column("system_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=False),
+        sa.Column("task_prompt", sa.String(length=MAX_PROMPT_LENGTH), nullable=False),
+        sa.Column(
+            "datetime_aware", sa.Boolean(), nullable=False, server_default="true"
+        ),
+        sa.Column(
+            "default_prompt", sa.Boolean(), nullable=False, server_default="false"
+        ),
+        sa.Column("deleted", sa.Boolean(), nullable=False, server_default="false"),
+        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+    # Step 2: Recreate the persona__prompt association table
+    op.create_table(
+        "persona__prompt",
+        sa.Column("persona_id", sa.Integer(), nullable=False),
+        sa.Column("prompt_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["persona_id"],
+            ["persona.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["prompt_id"],
+            ["prompt.id"],
+        ),
+        sa.PrimaryKeyConstraint("persona_id", "prompt_id"),
+    )
+
+    # Step 3: Migrate data back from persona to prompt table
+    op.execute(
+        """
+        INSERT INTO prompt (
+            name,
+            description,
+            system_prompt,
+            task_prompt,
+            datetime_aware,
+            default_prompt,
+            deleted,
+            user_id
+        )
+        SELECT
+            CONCAT('Prompt for ', name),
+            description,
+            system_prompt,
+            task_prompt,
+            datetime_aware,
+            is_default_persona,
+            deleted,
+            user_id
+        FROM persona
+        WHERE system_prompt IS NOT NULL AND system_prompt != ''
+        RETURNING id, name
+        """
+    )
+
+    # Step 4: Re-establish persona__prompt relationships
+    op.execute(
+        """
+        INSERT INTO persona__prompt (persona_id, prompt_id)
+        SELECT
+            p.id as persona_id,
+            pr.id as prompt_id
+        FROM persona p
+        JOIN prompt pr ON pr.name = CONCAT('Prompt for ', p.name)
+        WHERE p.system_prompt IS NOT NULL AND p.system_prompt != ''
+    """
+    )
+
+    # Step 5: Add prompt_id column back to chat_message
+    op.add_column("chat_message", sa.Column("prompt_id", sa.Integer(), nullable=True))
+
+    # Step 6: Re-establish foreign key constraint
+    op.create_foreign_key(
+        "chat_message__prompt_fk", "chat_message", "prompt", ["prompt_id"], ["id"]
+    )
+
+    # Step 7: Remove columns from persona table
+    op.drop_column("persona", "datetime_aware")
+    op.drop_column("persona", "task_prompt")
+    op.drop_column("persona", "system_prompt")
--- a/backend/alembic/versions/b30353be4eec_add_mcp_auth_performer.py
+++ b/backend/alembic/versions/b30353be4eec_add_mcp_auth_performer.py
@@ -0,0 +1,123 @@
+"""add_mcp_auth_performer
+
+Revision ID: b30353be4eec
+Revises: 2b75d0a8ffcb
+Create Date: 2025-09-13 14:58:08.413534
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+from onyx.db.enums import MCPAuthenticationPerformer, MCPTransport
+
+
+# revision identifiers, used by Alembic.
+revision = "b30353be4eec"
+down_revision = "2b75d0a8ffcb"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """moving to a better way of handling auth performer and transport"""
+    # Add nullable column first for backward compatibility
+    op.add_column(
+        "mcp_server",
+        sa.Column(
+            "auth_performer",
+            sa.Enum(MCPAuthenticationPerformer, native_enum=False),
+            nullable=True,
+        ),
+    )
+
+    op.add_column(
+        "mcp_server",
+        sa.Column(
+            "transport",
+            sa.Enum(MCPTransport, native_enum=False),
+            nullable=True,
+        ),
+    )
+
+    # # Backfill values using existing data and inference rules
+    bind = op.get_bind()
+
+    # 1) OAUTH servers are always PER_USER
+    bind.execute(
+        sa.text(
+            """
+        UPDATE mcp_server
+        SET auth_performer = 'PER_USER'
+        WHERE auth_type = 'OAUTH'
+        """
+        )
+    )
+
+    # 2) If there is no admin connection config, mark as ADMIN (and not set yet)
+    bind.execute(
+        sa.text(
+            """
+        UPDATE mcp_server
+        SET auth_performer = 'ADMIN'
+        WHERE admin_connection_config_id IS NULL
+          AND auth_performer IS NULL
+        """
+        )
+    )
+
+    # 3) If there exists any user-specific connection config (user_email != ''), mark as PER_USER
+    bind.execute(
+        sa.text(
+            """
+        UPDATE mcp_server AS ms
+        SET auth_performer = 'PER_USER'
+        FROM mcp_connection_config AS mcc
+        WHERE mcc.mcp_server_id = ms.id
+          AND COALESCE(mcc.user_email, '') <> ''
+          AND ms.auth_performer IS NULL
+        """
+        )
+    )
+
+    # 4) Default any remaining nulls to ADMIN (covers API_TOKEN admin-managed and NONE)
+    bind.execute(
+        sa.text(
+            """
+        UPDATE mcp_server
+        SET auth_performer = 'ADMIN'
+        WHERE auth_performer IS NULL
+        """
+        )
+    )
+
+    # Finally, make the column non-nullable
+    op.alter_column(
+        "mcp_server",
+        "auth_performer",
+        existing_type=sa.Enum(MCPAuthenticationPerformer, native_enum=False),
+        nullable=False,
+    )
+
+    # Backfill transport for existing rows to STREAMABLE_HTTP, then make non-nullable
+    bind.execute(
+        sa.text(
+            """
+        UPDATE mcp_server
+        SET transport = 'STREAMABLE_HTTP'
+        WHERE transport IS NULL
+        """
+        )
+    )
+
+    op.alter_column(
+        "mcp_server",
+        "transport",
+        existing_type=sa.Enum(MCPTransport, native_enum=False),
+        nullable=False,
+    )
+
+
+def downgrade() -> None:
+    """remove cols"""
+    op.drop_column("mcp_server", "transport")
+    op.drop_column("mcp_server", "auth_performer")
--- a/backend/alembic/versions/b4ef3ae0bf6e_add_user_oauth_token_to_slack_bot.py
+++ b/backend/alembic/versions/b4ef3ae0bf6e_add_user_oauth_token_to_slack_bot.py
@@ -0,0 +1,27 @@
+"""add_user_oauth_token_to_slack_bot
+
+Revision ID: b4ef3ae0bf6e
+Revises: 505c488f6662
+Create Date: 2025-08-26 17:47:41.788462
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "b4ef3ae0bf6e"
+down_revision = "505c488f6662"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add user_token column to slack_bot table
+    op.add_column("slack_bot", sa.Column("user_token", sa.LargeBinary(), nullable=True))
+
+
+def downgrade() -> None:
+    # Remove user_token column from slack_bot table
+    op.drop_column("slack_bot", "user_token")
--- a/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py
+++ b/backend/alembic/versions/b7ec9b5b505f_adjust_prompt_length.py
@@ -0,0 +1,43 @@
+"""adjust prompt length
+
+Revision ID: b7ec9b5b505f
+Revises: abbfec3a5ac5
+Create Date: 2025-09-10 18:51:15.629197
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "b7ec9b5b505f"
+down_revision = "abbfec3a5ac5"
+branch_labels = None
+depends_on = None
+
+
+MAX_PROMPT_LENGTH = 5_000_000
+
+
+def upgrade() -> None:
+    # NOTE: need to run this since the previous migration PREVIOUSLY set the length to 8000
+    op.alter_column(
+        "persona",
+        "system_prompt",
+        existing_type=sa.String(length=8000),
+        type_=sa.String(length=MAX_PROMPT_LENGTH),
+        existing_nullable=False,
+    )
+    op.alter_column(
+        "persona",
+        "task_prompt",
+        existing_type=sa.String(length=8000),
+        type_=sa.String(length=MAX_PROMPT_LENGTH),
+        existing_nullable=False,
+    )
+
+
+def downgrade() -> None:
+    # Downgrade not necessary
+    pass
--- a/backend/alembic/versions/d09fc20a3c66_seed_builtin_tools.py
+++ b/backend/alembic/versions/d09fc20a3c66_seed_builtin_tools.py
@@ -0,0 +1,152 @@
+"""seed_builtin_tools
+
+Revision ID: d09fc20a3c66
+Revises: b7ec9b5b505f
+Create Date: 2025-09-09 19:32:16.824373
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "d09fc20a3c66"
+down_revision = "b7ec9b5b505f"
+branch_labels = None
+depends_on = None
+
+
+# Tool definitions - core tools that should always be seeded
+# Names/in_code_tool_id are the same as the class names in the tool_implementations package
+BUILT_IN_TOOLS = [
+    {
+        "name": "SearchTool",
+        "display_name": "Internal Search",
+        "description": "The Search Action allows the Assistant to search through connected knowledge to help build an answer.",
+        "in_code_tool_id": "SearchTool",
+    },
+    {
+        "name": "ImageGenerationTool",
+        "display_name": "Image Generation",
+        "description": (
+            "The Image Generation Action allows the assistant to use DALL-E 3 or GPT-IMAGE-1 to generate images. "
+            "The action will be used when the user asks the assistant to generate an image."
+        ),
+        "in_code_tool_id": "ImageGenerationTool",
+    },
+    {
+        "name": "WebSearchTool",
+        "display_name": "Web Search",
+        "description": (
+            "The Web Search Action allows the assistant "
+            "to perform internet searches for up-to-date information."
+        ),
+        "in_code_tool_id": "WebSearchTool",
+    },
+    {
+        "name": "KnowledgeGraphTool",
+        "display_name": "Knowledge Graph Search",
+        "description": (
+            "The Knowledge Graph Search Action allows the assistant to search the "
+            "Knowledge Graph for information. This tool can (for now) only be active in the KG Beta Assistant, "
+            "and it requires the Knowledge Graph to be enabled."
+        ),
+        "in_code_tool_id": "KnowledgeGraphTool",
+    },
+    {
+        "name": "OktaProfileTool",
+        "display_name": "Okta Profile",
+        "description": (
+            "The Okta Profile Action allows the assistant to fetch the current user's information from Okta. "
+            "This may include the user's name, email, phone number, address, and other details such as their "
+            "manager and direct reports."
+        ),
+        "in_code_tool_id": "OktaProfileTool",
+    },
+]
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+
+    # Start transaction
+    conn.execute(sa.text("BEGIN"))
+
+    try:
+        # Get existing tools to check what already exists
+        existing_tools = conn.execute(
+            sa.text(
+                "SELECT in_code_tool_id FROM tool WHERE in_code_tool_id IS NOT NULL"
+            )
+        ).fetchall()
+        existing_tool_ids = {row[0] for row in existing_tools}
+
+        # Insert or update built-in tools
+        for tool in BUILT_IN_TOOLS:
+            in_code_id = tool["in_code_tool_id"]
+
+            # Handle historical rename: InternetSearchTool -> WebSearchTool
+            if (
+                in_code_id == "WebSearchTool"
+                and "WebSearchTool" not in existing_tool_ids
+                and "InternetSearchTool" in existing_tool_ids
+            ):
+                # Rename the existing InternetSearchTool row in place and update fields
+                conn.execute(
+                    sa.text(
+                        """
+                        UPDATE tool
+                        SET name = :name,
+                            display_name = :display_name,
+                            description = :description,
+                            in_code_tool_id = :in_code_tool_id
+                        WHERE in_code_tool_id = 'InternetSearchTool'
+                        """
+                    ),
+                    tool,
+                )
+                # Keep the local view of existing ids in sync to avoid duplicate insert
+                existing_tool_ids.discard("InternetSearchTool")
+                existing_tool_ids.add("WebSearchTool")
+                continue
+
+            if in_code_id in existing_tool_ids:
+                # Update existing tool
+                conn.execute(
+                    sa.text(
+                        """
+                        UPDATE tool
+                        SET name = :name,
+                            display_name = :display_name,
+                            description = :description
+                        WHERE in_code_tool_id = :in_code_tool_id
+                        """
+                    ),
+                    tool,
+                )
+            else:
+                # Insert new tool
+                conn.execute(
+                    sa.text(
+                        """
+                        INSERT INTO tool (name, display_name, description, in_code_tool_id)
+                        VALUES (:name, :display_name, :description, :in_code_tool_id)
+                        """
+                    ),
+                    tool,
+                )
+
+        # Commit transaction
+        conn.execute(sa.text("COMMIT"))
+
+    except Exception as e:
+        # Rollback on error
+        conn.execute(sa.text("ROLLBACK"))
+        raise e
+
+
+def downgrade() -> None:
+    # We don't remove the tools on downgrade since it's totally fine to just
+    # have them around. If we upgrade again, it will be a no-op.
+    pass
--- a/backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py
@@ -93,7 +93,7 @@ def _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:

    if cc_pair.access_type != AccessType.SYNC:
        task_logger.error(
-            f"Recieved non-sync CC Pair {cc_pair.id} for external "
+            f"Received non-sync CC Pair {cc_pair.id} for external "
            f"group sync. Actual access type: {cc_pair.access_type}"
        )
        return False
--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -17,6 +17,7 @@ from ee.onyx.server.enterprise_settings.api import (
 from ee.onyx.server.enterprise_settings.api import (
    basic_router as enterprise_settings_router,
 )
+from ee.onyx.server.evals.api import router as evals_router
 from ee.onyx.server.manage.standard_answer import router as standard_answer_router
 from ee.onyx.server.middleware.tenant_tracking import (
    add_api_server_tenant_id_middleware,
@@ -170,6 +171,7 @@ def get_application() -> FastAPI:
    include_router_with_global_prefix_prepended(application, standard_answer_router)
    include_router_with_global_prefix_prepended(application, ee_oauth_router)
    include_router_with_global_prefix_prepended(application, ee_document_cc_pair_router)
+    include_router_with_global_prefix_prepended(application, evals_router)

    # Enterprise-only global settings
    include_router_with_global_prefix_prepended(
--- a/backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py
+++ b/backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py
@@ -8,13 +8,12 @@ from sqlalchemy.orm import Session
 from ee.onyx.db.standard_answer import fetch_standard_answer_categories_by_names
 from ee.onyx.db.standard_answer import find_matching_standard_answers
 from onyx.configs.constants import MessageType
-from onyx.configs.onyxbot_configs import DANSWER_REACT_EMOJI
+from onyx.configs.onyxbot_configs import ONYX_BOT_REACT_EMOJI
 from onyx.db.chat import create_chat_session
 from onyx.db.chat import create_new_chat_message
 from onyx.db.chat import get_chat_messages_by_sessions
 from onyx.db.chat import get_chat_sessions_by_slack_thread_id
 from onyx.db.chat import get_or_create_root_message
-from onyx.db.models import Prompt
 from onyx.db.models import SlackChannelConfig
 from onyx.db.models import StandardAnswer as StandardAnswerModel
 from onyx.onyxbot.slack.blocks import get_restate_blocks
@@ -81,7 +80,6 @@ def _handle_standard_answers(
    message_info: SlackMessageInfo,
    receiver_ids: list[str] | None,
    slack_channel_config: SlackChannelConfig,
-    prompt: Prompt | None,
    logger: OnyxLoggingAdapter,
    client: WebClient,
    db_session: Session,
@@ -161,7 +159,6 @@ def _handle_standard_answers(
        new_user_message = create_new_chat_message(
            chat_session_id=chat_session.id,
            parent_message=root_message,
-            prompt_id=prompt.id if prompt else None,
            message=query_msg.message,
            token_count=0,
            message_type=MessageType.USER,
@@ -182,7 +179,6 @@ def _handle_standard_answers(
        chat_message = create_new_chat_message(
            chat_session_id=chat_session.id,
            parent_message=new_user_message,
-            prompt_id=prompt.id if prompt else None,
            message=answer_message,
            token_count=0,
            message_type=MessageType.ASSISTANT,
@@ -197,7 +193,7 @@ def _handle_standard_answers(
        db_session.commit()

        update_emote_react(
-            emoji=DANSWER_REACT_EMOJI,
+            emoji=ONYX_BOT_REACT_EMOJI,
            channel=message_info.channel_to_respond,
            message_ts=message_info.msg_to_respond,
            remove=True,
--- a/backend/ee/onyx/server/auth_check.py
+++ b/backend/ee/onyx/server/auth_check.py
@@ -16,6 +16,7 @@ EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
    # saml
    ("/auth/saml/authorize", {"GET"}),
    ("/auth/saml/callback", {"POST"}),
+    ("/auth/saml/callback", {"GET"}),
    ("/auth/saml/logout", {"POST"}),
 ]

--- a/backend/onyx/server/features/folder/init.py
+++ b/backend/onyx/server/features/folder/init.py
--- a/backend/ee/onyx/server/evals/api.py
+++ b/backend/ee/onyx/server/evals/api.py
@@ -0,0 +1,32 @@
+from fastapi import APIRouter
+from fastapi import Depends
+
+from ee.onyx.auth.users import current_cloud_superuser
+from onyx.background.celery.apps.client import celery_app as client_app
+from onyx.configs.constants import OnyxCeleryTask
+from onyx.db.models import User
+from onyx.evals.models import EvalConfigurationOptions
+from onyx.server.evals.models import EvalRunAck
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+router = APIRouter(prefix="/evals")
+
+
+@router.post("/eval_run", response_model=EvalRunAck)
+def eval_run(
+    request: EvalConfigurationOptions,
+    user: User = Depends(current_cloud_superuser),
+) -> EvalRunAck:
+    """
+    Run an evaluation with the given message and optional dataset.
+    This endpoint requires a valid API key for authentication.
+    """
+    client_app.send_task(
+        OnyxCeleryTask.EVAL_RUN_TASK,
+        kwargs={
+            "configuration_dict": request.model_dump(),
+        },
+    )
+    return EvalRunAck(success=True)
--- a/backend/ee/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/chat_backend.py
@@ -93,7 +93,6 @@ def handle_simplified_chat_message(
        parent_message_id=parent_message.id,
        message=chat_message_req.message,
        file_descriptors=[],
-        prompt_id=None,
        search_doc_ids=chat_message_req.search_doc_ids,
        retrieval_options=retrieval_options,
        # Simple API does not support reranking, hide complexity from user
@@ -181,7 +180,6 @@ def handle_send_message_simple_with_history(
        chat_message = create_new_chat_message(
            chat_session_id=chat_session.id,
            parent_message=chat_message,
-            prompt_id=req.prompt_id,
            message=msg.message,
            token_count=len(llm_tokenizer.encode(msg.message)),
            message_type=msg.role,
@@ -214,7 +212,6 @@ def handle_send_message_simple_with_history(
        parent_message_id=chat_message.id,
        message=query,
        file_descriptors=[],
-        prompt_id=req.prompt_id,
        search_doc_ids=req.search_doc_ids,
        retrieval_options=retrieval_options,
        # Simple API does not support reranking, hide complexity from user
--- a/backend/ee/onyx/server/query_and_chat/models.py
+++ b/backend/ee/onyx/server/query_and_chat/models.py
@@ -73,7 +73,6 @@ class BasicCreateChatMessageRequest(ChunkContext):
 class BasicCreateChatMessageWithHistoryRequest(ChunkContext):
    # Last element is the new query. All previous elements are historical context
    messages: list[ThreadMessage]
-    prompt_id: int | None
    persona_id: int
    retrieval_options: RetrievalDetails | None = None
    query_override: str | None = None
@@ -162,7 +161,6 @@ class OneShotQARequest(ChunkContext):
    persona_id: int | None = None

    messages: list[ThreadMessage]
-    prompt_id: int | None = None
    retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails)
    rerank_settings: RerankingDetails | None = None

@@ -181,11 +179,9 @@ class OneShotQARequest(ChunkContext):
    def check_persona_fields(self) -> "OneShotQARequest":
        if self.persona_override_config is None and self.persona_id is None:
            raise ValueError("Exactly one of persona_config or persona_id must be set")
-        elif self.persona_override_config is not None and (
-            self.persona_id is not None or self.prompt_id is not None
-        ):
+        elif self.persona_override_config is not None and (self.persona_id is not None):
            raise ValueError(
-                "If persona_override_config is set, persona_id and prompt_id cannot be set"
+                "If persona_override_config is set, persona_id cannot be set"
            )
        return self

--- a/backend/ee/onyx/server/query_and_chat/query_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/query_backend.py
@@ -31,7 +31,6 @@ from onyx.context.search.pipeline import SearchPipeline
 from onyx.context.search.utils import dedupe_documents
 from onyx.context.search.utils import drop_llm_indices
 from onyx.context.search.utils import relevant_sections_to_indices
-from onyx.db.chat import get_prompt_by_id
 from onyx.db.engine.sql_engine import get_session
 from onyx.db.models import Persona
 from onyx.db.models import User
@@ -152,14 +151,6 @@ def get_answer_stream(
    ):
        raise KeyError("Must provide persona ID or Persona Config")

-    prompt = None
-    if query_request.prompt_id is not None:
-        prompt = get_prompt_by_id(
-            prompt_id=query_request.prompt_id,
-            user=user,
-            db_session=db_session,
-        )
-
    persona_info: Persona | PersonaOverrideConfig | None = None
    if query_request.persona_override_config is not None:
        persona_info = query_request.persona_override_config
@@ -194,7 +185,6 @@ def get_answer_stream(
        user=user,
        persona_id=query_request.persona_id,
        persona_override_config=query_request.persona_override_config,
-        prompt=prompt,
        message_ts_to_respond_to=None,
        retrieval_details=query_request.retrieval_options,
        rerank_settings=query_request.rerank_settings,
--- a/backend/ee/onyx/server/query_history/api.py
+++ b/backend/ee/onyx/server/query_history/api.py
@@ -182,7 +182,6 @@ def admin_get_chat_sessions(
                time_created=chat.time_created.isoformat(),
                time_updated=chat.time_updated.isoformat(),
                shared_status=chat.shared_status,
-                folder_id=chat.folder_id,
                current_alternate_model=chat.current_alternate_model,
            )
            for chat in chat_sessions
--- a/backend/ee/onyx/server/saml.py
+++ b/backend/ee/onyx/server/saml.py
@@ -110,7 +110,6 @@ async def upsert_saml_user(email: str) -> User:


 async def prepare_from_fastapi_request(request: Request) -> dict[str, Any]:
-    form_data = await request.form()
    if request.client is None:
        raise ValueError("Invalid request for SAML")

@@ -125,14 +124,27 @@ async def prepare_from_fastapi_request(request: Request) -> dict[str, Any]:
        "post_data": {},
        "get_data": {},
    }
+
+    # Handle query parameters (for GET requests)
    if request.query_params:
-        rv["get_data"] = (request.query_params,)
-    if "SAMLResponse" in form_data:
-        SAMLResponse = form_data["SAMLResponse"]
-        rv["post_data"]["SAMLResponse"] = SAMLResponse
-    if "RelayState" in form_data:
-        RelayState = form_data["RelayState"]
-        rv["post_data"]["RelayState"] = RelayState
+        rv["get_data"] = dict(request.query_params)
+
+    # Handle form data (for POST requests)
+    if request.method == "POST":
+        form_data = await request.form()
+        if "SAMLResponse" in form_data:
+            SAMLResponse = form_data["SAMLResponse"]
+            rv["post_data"]["SAMLResponse"] = SAMLResponse
+        if "RelayState" in form_data:
+            RelayState = form_data["RelayState"]
+            rv["post_data"]["RelayState"] = RelayState
+    else:
+        # For GET requests, check if SAMLResponse is in query params
+        if "SAMLResponse" in request.query_params:
+            rv["get_data"]["SAMLResponse"] = request.query_params["SAMLResponse"]
+        if "RelayState" in request.query_params:
+            rv["get_data"]["RelayState"] = request.query_params["RelayState"]
+
    return rv


@@ -148,10 +160,27 @@ async def saml_login(request: Request) -> SAMLAuthorizeResponse:
    return SAMLAuthorizeResponse(authorization_url=callback_url)


+@router.get("/callback")
+async def saml_login_callback_get(
+    request: Request,
+    db_session: Session = Depends(get_session),
+) -> Response:
+    """Handle SAML callback via HTTP-Redirect binding (GET request)"""
+    return await _process_saml_callback(request, db_session)
+
+
@router.post("/callback")
 async def saml_login_callback(
    request: Request,
    db_session: Session = Depends(get_session),
+) -> Response:
+    """Handle SAML callback via HTTP-POST binding (POST request)"""
+    return await _process_saml_callback(request, db_session)
+
+
+async def _process_saml_callback(
+    request: Request,
+    db_session: Session,
 ) -> Response:
    req = await prepare_from_fastapi_request(request)
    auth = OneLogin_Saml2_Auth(req, custom_base_path=SAML_CONF_DIR)
--- a/backend/ee/onyx/server/seeding.py
+++ b/backend/ee/onyx/server/seeding.py
@@ -131,32 +131,35 @@ def _seed_llms(
 def _seed_personas(db_session: Session, personas: list[PersonaUpsertRequest]) -> None:
    if personas:
        logger.notice("Seeding Personas")
-        for persona in personas:
-            if not persona.prompt_ids:
-                raise ValueError(
-                    f"Invalid Persona with name {persona.name}; no prompts exist"
+        try:
+            for persona in personas:
+                upsert_persona(
+                    user=None,  # Seeding is done as admin
+                    name=persona.name,
+                    description=persona.description,
+                    num_chunks=(
+                        persona.num_chunks if persona.num_chunks is not None else 0.0
+                    ),
+                    llm_relevance_filter=persona.llm_relevance_filter,
+                    llm_filter_extraction=persona.llm_filter_extraction,
+                    recency_bias=RecencyBiasSetting.AUTO,
+                    document_set_ids=persona.document_set_ids,
+                    llm_model_provider_override=persona.llm_model_provider_override,
+                    llm_model_version_override=persona.llm_model_version_override,
+                    starter_messages=persona.starter_messages,
+                    is_public=persona.is_public,
+                    db_session=db_session,
+                    tool_ids=persona.tool_ids,
+                    display_priority=persona.display_priority,
+                    system_prompt=persona.system_prompt,
+                    task_prompt=persona.task_prompt,
+                    datetime_aware=persona.datetime_aware,
+                    commit=False,
                )
-
-            upsert_persona(
-                user=None,  # Seeding is done as admin
-                name=persona.name,
-                description=persona.description,
-                num_chunks=(
-                    persona.num_chunks if persona.num_chunks is not None else 0.0
-                ),
-                llm_relevance_filter=persona.llm_relevance_filter,
-                llm_filter_extraction=persona.llm_filter_extraction,
-                recency_bias=RecencyBiasSetting.AUTO,
-                prompt_ids=persona.prompt_ids,
-                document_set_ids=persona.document_set_ids,
-                llm_model_provider_override=persona.llm_model_provider_override,
-                llm_model_version_override=persona.llm_model_version_override,
-                starter_messages=persona.starter_messages,
-                is_public=persona.is_public,
-                db_session=db_session,
-                tool_ids=persona.tool_ids,
-                display_priority=persona.display_priority,
-            )
+            db_session.commit()
+        except Exception:
+            logger.exception("Failed to seed personas.")
+            raise


 def _seed_settings(settings: Settings) -> None:
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -37,24 +37,51 @@ def get_embedding_model(
    model_name: str,
    max_context_length: int,
 ) -> "SentenceTransformer":
+    """
+    Loads or returns a cached SentenceTransformer, sets max_seq_length, pins device,
+    pre-warms rotary caches once, and wraps encode() with a lock to avoid cache races.
+    """
    from sentence_transformers import SentenceTransformer  # type: ignore

-    global _GLOBAL_MODELS_DICT  # A dictionary to store models
+    def _prewarm_rope(st_model: "SentenceTransformer", target_len: int) -> None:
+        """
+        Build RoPE cos/sin caches once on the final device/dtype so later forwards only read.
+        Works by calling the underlying HF model directly with dummy IDs/attention.
+        """
+        try:
+            # ensure > max seq after tokenization
+            # Ideally we would use the saved tokenizer, but whatever it's ok
+            # we'll make an assumption about tokenization here
+            long_text = "x " * (target_len * 2)
+            _ = st_model.encode(
+                [long_text],
+                batch_size=1,
+                convert_to_tensor=True,
+                show_progress_bar=False,
+                normalize_embeddings=False,
+            )
+            logger.info("RoPE pre-warm successful")
+        except Exception as e:
+            logger.warning(f"RoPE pre-warm skipped/failed: {e}")
+
+    global _GLOBAL_MODELS_DICT

    if model_name not in _GLOBAL_MODELS_DICT:
        logger.notice(f"Loading {model_name}")
-        # Some model architectures that aren't built into the Transformers or Sentence
-        # Transformer need to be downloaded to be loaded locally. This does not mean
-        # data is sent to remote servers for inference, however the remote code can
-        # be fairly arbitrary so only use trusted models
        model = SentenceTransformer(
            model_name_or_path=model_name,
            trust_remote_code=True,
        )
        model.max_seq_length = max_context_length
+        _prewarm_rope(model, max_context_length)
        _GLOBAL_MODELS_DICT[model_name] = model
-    elif max_context_length != _GLOBAL_MODELS_DICT[model_name].max_seq_length:
-        _GLOBAL_MODELS_DICT[model_name].max_seq_length = max_context_length
+    else:
+        model = _GLOBAL_MODELS_DICT[model_name]
+        if max_context_length != model.max_seq_length:
+            model.max_seq_length = max_context_length
+            prev = getattr(model, "_rope_prewarmed_to", 0)
+            if max_context_length > int(prev or 0):
+                _prewarm_rope(model, max_context_length)

    return _GLOBAL_MODELS_DICT[model_name]

--- a/backend/onyx/access/access.py
+++ b/backend/onyx/access/access.py
@@ -1,6 +1,7 @@
 from collections.abc import Callable
 from typing import cast

+from sqlalchemy.orm import joinedload
 from sqlalchemy.orm import Session

 from onyx.access.models import DocumentAccess
@@ -10,6 +11,7 @@ from onyx.configs.constants import PUBLIC_DOC_PAT
 from onyx.db.document import get_access_info_for_document
 from onyx.db.document import get_access_info_for_documents
 from onyx.db.models import User
+from onyx.db.models import UserFile
 from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
 from onyx.utils.variable_functionality import fetch_versioned_implementation

@@ -124,3 +126,25 @@ def source_should_fetch_permissions_during_indexing(source: DocumentSource) -> b
        ),
    )
    return _source_should_fetch_permissions_during_indexing_func(source)
+
+
+def get_access_for_user_files(
+    user_file_ids: list[str],
+    db_session: Session,
+) -> dict[str, DocumentAccess]:
+    user_files = (
+        db_session.query(UserFile)
+        .options(joinedload(UserFile.user))  # Eager load the user relationship
+        .filter(UserFile.id.in_(user_file_ids))
+        .all()
+    )
+    return {
+        str(user_file.id): DocumentAccess.build(
+            user_emails=[user_file.user.email] if user_file.user else [],
+            user_groups=[],
+            is_public=True if user_file.user is None else False,
+            external_user_emails=[],
+            external_user_group_ids=[],
+        )
+        for user_file in user_files
+    }
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a1_search_objects.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a1_search_objects.py
@@ -39,7 +39,7 @@ def search_objects(
        raise ValueError("Search tool and persona must be provided for DivCon search")

    try:
-        instructions = graph_config.inputs.persona.prompts[0].system_prompt
+        instructions = graph_config.inputs.persona.system_prompt or ""

        agent_1_instructions = extract_section(
            instructions, "Agent Step 1:", "Agent Step 2:"
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a2_research_object_source.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a2_research_object_source.py
@@ -43,7 +43,7 @@ def research_object_source(
        raise ValueError("Search tool and persona must be provided for DivCon search")

    try:
-        instructions = graph_config.inputs.persona.prompts[0].system_prompt
+        instructions = graph_config.inputs.persona.system_prompt or ""

        agent_2_instructions = extract_section(
            instructions, "Agent Step 2:", "Agent Step 3:"
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a4_consolidate_object_research.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a4_consolidate_object_research.py
@@ -33,7 +33,7 @@ def consolidate_object_research(
    if search_tool is None or graph_config.inputs.persona is None:
        raise ValueError("Search tool and persona must be provided for DivCon search")

-    instructions = graph_config.inputs.persona.prompts[0].system_prompt
+    instructions = graph_config.inputs.persona.system_prompt or ""

    agent_4_instructions = extract_section(
        instructions, "Agent Step 4:", "Agent Step 5:"
--- a/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a5_consolidate_research.py
+++ b/backend/onyx/agents/agent_search/dc_search_analysis/nodes/a5_consolidate_research.py
@@ -35,7 +35,7 @@ def consolidate_research(
        raise ValueError("Search tool and persona must be provided for DivCon search")

    # Populate prompt
-    instructions = graph_config.inputs.persona.prompts[0].system_prompt
+    instructions = graph_config.inputs.persona.system_prompt or ""

    try:
        agent_5_instructions = extract_section(
--- a/backend/onyx/agents/agent_search/dr/conditional_edges.py
+++ b/backend/onyx/agents/agent_search/dr/conditional_edges.py
@@ -24,6 +24,8 @@ def decision_router(state: MainState) -> list[Send | Hashable] | DRPath | str:
        return END
    elif next_tool_name == DRPath.LOGGER.value:
        return DRPath.LOGGER
+    elif next_tool_name == DRPath.CLOSER.value:
+        return DRPath.CLOSER
    else:
        return DRPath.ORCHESTRATOR

--- a/backend/onyx/agents/agent_search/dr/constants.py
+++ b/backend/onyx/agents/agent_search/dr/constants.py
@@ -27,4 +27,5 @@ AVERAGE_TOOL_COSTS: dict[DRPath, float] = {
 DR_TIME_BUDGET_BY_TYPE = {
    ResearchType.THOUGHTFUL: 3.0,
    ResearchType.DEEP: 12.0,
+    ResearchType.FAST: 0.5,
 }
--- a/backend/onyx/agents/agent_search/dr/enums.py
+++ b/backend/onyx/agents/agent_search/dr/enums.py
@@ -8,6 +8,7 @@ class ResearchType(str, Enum):
    LEGACY_AGENTIC = "LEGACY_AGENTIC"  # only used for legacy agentic search migrations
    THOUGHTFUL = "THOUGHTFUL"
    DEEP = "DEEP"
+    FAST = "FAST"


 class ResearchAnswerPurpose(str, Enum):
--- a/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py
+++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py
@@ -35,12 +35,24 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.agents.agent_search.shared_graph_utils.utils import run_with_timeout
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.agents.agent_search.utils import create_question_prompt
+from onyx.chat.chat_utils import build_citation_map_from_numbers
+from onyx.chat.chat_utils import saved_search_docs_from_llm_docs
+from onyx.chat.models import PromptConfig
+from onyx.chat.prompt_builder.citations_prompt import build_citations_system_message
+from onyx.chat.prompt_builder.citations_prompt import build_citations_user_message
+from onyx.chat.stream_processing.citation_processing import (
+    normalize_square_bracket_citations_to_double_with_links,
+)
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_LONG
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_SHORT
 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import DocumentSourceDescription
 from onyx.configs.constants import TMP_DRALPHA_PERSONA_NAME
+from onyx.db.chat import create_search_doc_from_saved_search_doc
 from onyx.db.chat import update_db_session_with_messages
 from onyx.db.connector import fetch_unique_document_sources
 from onyx.db.kg_config import get_kg_config_settings
+from onyx.db.models import SearchDoc
 from onyx.db.models import Tool
 from onyx.db.tools import get_tools
 from onyx.file_store.models import ChatFileType
@@ -50,14 +62,14 @@ from onyx.kg.utils.extraction_utils import get_relationship_types_str
 from onyx.llm.utils import check_number_of_tokens
 from onyx.llm.utils import get_max_input_tokens
 from onyx.natural_language_processing.utils import get_tokenizer
+from onyx.prompts.chat_prompts import PROJECT_INSTRUCTIONS_SEPARATOR
 from onyx.prompts.dr_prompts import ANSWER_PROMPT_WO_TOOL_CALLING
 from onyx.prompts.dr_prompts import DECISION_PROMPT_W_TOOL_CALLING
 from onyx.prompts.dr_prompts import DECISION_PROMPT_WO_TOOL_CALLING
 from onyx.prompts.dr_prompts import DEFAULT_DR_SYSTEM_PROMPT
-from onyx.prompts.dr_prompts import EVAL_SYSTEM_PROMPT_W_TOOL_CALLING
-from onyx.prompts.dr_prompts import EVAL_SYSTEM_PROMPT_WO_TOOL_CALLING
 from onyx.prompts.dr_prompts import REPEAT_PROMPT
 from onyx.prompts.dr_prompts import TOOL_DESCRIPTION
+from onyx.prompts.prompt_template import PromptTemplate
 from onyx.server.query_and_chat.streaming_models import MessageStart
 from onyx.server.query_and_chat.streaming_models import OverallStop
 from onyx.server.query_and_chat.streaming_models import SectionEnd
@@ -65,13 +77,13 @@ from onyx.server.query_and_chat.streaming_models import StreamingType
 from onyx.tools.tool_implementations.images.image_generation_tool import (
    ImageGenerationTool,
 )
-from onyx.tools.tool_implementations.internet_search.internet_search_tool import (
-    InternetSearchTool,
-)
 from onyx.tools.tool_implementations.knowledge_graph.knowledge_graph_tool import (
    KnowledgeGraphTool,
 )
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
+from onyx.tools.tool_implementations.web_search.web_search_tool import (
+    WebSearchTool,
+)
 from onyx.utils.b64 import get_image_type
 from onyx.utils.b64 import get_image_type_from_bytes
 from onyx.utils.logger import setup_logger
@@ -108,19 +120,24 @@ def _get_available_tools(

    for tool in graph_config.tooling.tools:

+        if not tool.is_available(db_session):
+            logger.info(f"Tool {tool.name} is not available, skipping")
+            continue
+
        tool_db_info = tool_dict.get(tool.id)
        if tool_db_info:
            incode_tool_id = tool_db_info.in_code_tool_id
        else:
            raise ValueError(f"Tool {tool.name} is not found in the database")

-        if isinstance(tool, InternetSearchTool):
+        if isinstance(tool, WebSearchTool):
            llm_path = DRPath.WEB_SEARCH.value
            path = DRPath.WEB_SEARCH
        elif isinstance(tool, SearchTool):
            llm_path = DRPath.INTERNAL_SEARCH.value
            path = DRPath.INTERNAL_SEARCH
        elif isinstance(tool, KnowledgeGraphTool) and include_kg:
+            # TODO (chris): move this into the `is_available` check
            if len(active_source_types) == 0:
                logger.error(
                    "No active source types found, skipping Knowledge Graph tool"
@@ -304,6 +321,52 @@ def _get_existing_clarification_request(
    return clarification, original_question, chat_history_string


+def _persist_final_docs_and_citations(
+    db_session: Session,
+    context_llm_docs: list[Any] | None,
+    full_answer: str | None,
+) -> tuple[list[SearchDoc], dict[int, int] | None]:
+    """Persist final documents from in-context docs and derive citation mapping.
+
+    Returns the list of persisted `SearchDoc` records and an optional
+    citation map translating inline [[n]] references to DB doc indices.
+    """
+    final_documents_db: list[SearchDoc] = []
+    citations_map: dict[int, int] | None = None
+
+    if not context_llm_docs:
+        return final_documents_db, citations_map
+
+    saved_search_docs = saved_search_docs_from_llm_docs(context_llm_docs)
+    for saved_doc in saved_search_docs:
+        db_doc = create_search_doc_from_saved_search_doc(saved_doc)
+        db_session.add(db_doc)
+        final_documents_db.append(db_doc)
+    db_session.flush()
+
+    cited_numbers: set[int] = set()
+    try:
+        # Match [[1]] or [[1, 2]] optionally followed by a link like ([[1]](http...))
+        matches = re.findall(
+            r"\[\[(\d+(?:,\s*\d+)*)\]\](?:\([^)]*\))?", full_answer or ""
+        )
+        for match in matches:
+            for num_str in match.split(","):
+                num = int(num_str.strip())
+                cited_numbers.add(num)
+    except Exception:
+        cited_numbers = set()
+
+    if cited_numbers and final_documents_db:
+        translations = build_citation_map_from_numbers(
+            cited_numbers=cited_numbers,
+            db_docs=final_documents_db,
+        )
+        citations_map = translations or None
+
+    return final_documents_db, citations_map
+
+
 _ARTIFICIAL_ALL_ENCOMPASSING_TOOL = {
    "type": "function",
    "function": {
@@ -399,23 +462,29 @@ def clarifier(
    else:
        active_source_type_descriptions_str = ""

-    if graph_config.inputs.persona and len(graph_config.inputs.persona.prompts) > 0:
-        assistant_system_prompt = (
-            graph_config.inputs.persona.prompts[0].system_prompt
-            or DEFAULT_DR_SYSTEM_PROMPT
-        ) + "\n\n"
-        if graph_config.inputs.persona.prompts[0].task_prompt:
+    if graph_config.inputs.persona:
+        assistant_system_prompt = PromptTemplate(
+            graph_config.inputs.persona.system_prompt or DEFAULT_DR_SYSTEM_PROMPT
+        ).build()
+        if graph_config.inputs.persona.task_prompt:
            assistant_task_prompt = (
                "\n\nHere are more specifications from the user:\n\n"
-                + graph_config.inputs.persona.prompts[0].task_prompt
+                + PromptTemplate(graph_config.inputs.persona.task_prompt).build()
            )
        else:
            assistant_task_prompt = ""

    else:
-        assistant_system_prompt = DEFAULT_DR_SYSTEM_PROMPT + "\n\n"
+        assistant_system_prompt = PromptTemplate(DEFAULT_DR_SYSTEM_PROMPT).build()
        assistant_task_prompt = ""

+    if graph_config.inputs.project_instructions:
+        assistant_system_prompt = (
+            assistant_system_prompt
+            + PROJECT_INSTRUCTIONS_SEPARATOR
+            + graph_config.inputs.project_instructions
+        )
+
    chat_history_string = (
        get_chat_history_string(
            graph_config.inputs.prompt_builder.message_history,
@@ -444,6 +513,11 @@ def clarifier(
        graph_config.inputs.files
    )

+    # Use project/search context docs if available to enable citation mapping
+    context_llm_docs = getattr(
+        graph_config.inputs.prompt_builder, "context_llm_docs", None
+    )
+
    if not (force_use_tool and force_use_tool.force_use):

        if not use_tool_calling_llm or len(available_tools) == 1:
@@ -459,8 +533,9 @@ def clarifier(
                llm_decision = invoke_llm_json(
                    llm=graph_config.tooling.primary_llm,
                    prompt=create_question_prompt(
-                        EVAL_SYSTEM_PROMPT_WO_TOOL_CALLING,
+                        assistant_system_prompt,
                        decision_prompt,
+                        uploaded_image_context=uploaded_image_context,
                    ),
                    schema=DecisionResponse,
                )
@@ -488,12 +563,13 @@ def clarifier(
                )

                answer_tokens, _, _ = run_with_timeout(
-                    80,
+                    TF_DR_TIMEOUT_LONG,
                    lambda: stream_llm_answer(
                        llm=graph_config.tooling.primary_llm,
                        prompt=create_question_prompt(
                            assistant_system_prompt,
                            answer_prompt + assistant_task_prompt,
+                            uploaded_image_context=uploaded_image_context,
                        ),
                        event_name="basic_response",
                        writer=writer,
@@ -501,7 +577,7 @@ def clarifier(
                        agent_answer_level=0,
                        agent_answer_question_num=0,
                        agent_answer_type="agent_level_answer",
-                        timeout_override=60,
+                        timeout_override=TF_DR_TIMEOUT_LONG,
                        ind=current_step_nr,
                        context_docs=None,
                        replace_citations=True,
@@ -556,10 +632,44 @@ def clarifier(
                active_source_type_descriptions_str=active_source_type_descriptions_str,
            )

+            if context_llm_docs:
+                persona = graph_config.inputs.persona
+                if persona is not None:
+                    prompt_config = PromptConfig.from_model(persona)
+                else:
+                    prompt_config = PromptConfig(
+                        system_prompt=assistant_system_prompt,
+                        task_prompt="",
+                        datetime_aware=True,
+                    )
+
+                system_prompt_to_use_content = build_citations_system_message(
+                    prompt_config
+                ).content
+                system_prompt_to_use: str = cast(str, system_prompt_to_use_content)
+                if graph_config.inputs.project_instructions:
+                    system_prompt_to_use = (
+                        system_prompt_to_use
+                        + PROJECT_INSTRUCTIONS_SEPARATOR
+                        + graph_config.inputs.project_instructions
+                    )
+                user_prompt_to_use = build_citations_user_message(
+                    user_query=original_question,
+                    files=[],
+                    prompt_config=prompt_config,
+                    context_docs=context_llm_docs,
+                    all_doc_useful=False,
+                    history_message=chat_history_string,
+                    context_type="user files",
+                ).content
+            else:
+                system_prompt_to_use = assistant_system_prompt
+                user_prompt_to_use = decision_prompt + assistant_task_prompt
+
            stream = graph_config.tooling.primary_llm.stream(
                prompt=create_question_prompt(
-                    assistant_system_prompt + EVAL_SYSTEM_PROMPT_W_TOOL_CALLING,
-                    decision_prompt + assistant_task_prompt,
+                    cast(str, system_prompt_to_use),
+                    cast(str, user_prompt_to_use),
                    uploaded_image_context=uploaded_image_context,
                ),
                tools=([_ARTIFICIAL_ALL_ENCOMPASSING_TOOL]),
@@ -572,6 +682,8 @@ def clarifier(
                should_stream_answer=True,
                writer=writer,
                ind=0,
+                final_search_results=context_llm_docs,
+                displayed_search_results=context_llm_docs,
                generate_final_answer=True,
                chat_message_id=str(graph_config.persistence.chat_session_id),
            )
@@ -579,19 +691,32 @@ def clarifier(
            if len(full_response.ai_message_chunk.tool_calls) == 0:

                if isinstance(full_response.full_answer, str):
-                    full_answer = full_response.full_answer
+                    full_answer = (
+                        normalize_square_bracket_citations_to_double_with_links(
+                            full_response.full_answer
+                        )
+                    )
                else:
                    full_answer = None

+                # Persist final documents and derive citations when using in-context docs
+                final_documents_db, citations_map = _persist_final_docs_and_citations(
+                    db_session=db_session,
+                    context_llm_docs=context_llm_docs,
+                    full_answer=full_answer,
+                )
+
                update_db_session_with_messages(
                    db_session=db_session,
                    chat_message_id=message_id,
                    chat_session_id=graph_config.persistence.chat_session_id,
                    is_agentic=graph_config.behavior.use_agentic_search,
                    message=full_answer,
+                    token_count=len(llm_tokenizer.encode(full_answer or "")),
+                    citations=citations_map,
+                    final_documents=final_documents_db or None,
                    update_parent_message=True,
                    research_answer_purpose=ResearchAnswerPurpose.ANSWER,
-                    token_count=len(llm_tokenizer.encode(full_answer or "")),
                )

                db_session.commit()
@@ -612,7 +737,7 @@ def clarifier(

    clarification = None

-    if research_type != ResearchType.THOUGHTFUL:
+    if research_type == ResearchType.DEEP:
        result = _get_existing_clarification_request(graph_config)
        if result is not None:
            clarification, original_question, chat_history_string = result
@@ -642,10 +767,12 @@ def clarifier(
                clarification_response = invoke_llm_json(
                    llm=graph_config.tooling.primary_llm,
                    prompt=create_question_prompt(
-                        assistant_system_prompt, clarification_prompt
+                        assistant_system_prompt,
+                        clarification_prompt,
+                        uploaded_image_context=uploaded_image_context,
                    ),
                    schema=ClarificationGenerationResponse,
-                    timeout_override=25,
+                    timeout_override=TF_DR_TIMEOUT_SHORT,
                    # max_tokens=1500,
                )
            except Exception as e:
@@ -674,7 +801,7 @@ def clarifier(
                )

                _, _, _ = run_with_timeout(
-                    80,
+                    TF_DR_TIMEOUT_LONG,
                    lambda: stream_llm_answer(
                        llm=graph_config.tooling.primary_llm,
                        prompt=repeat_prompt,
@@ -683,7 +810,7 @@ def clarifier(
                        agent_answer_level=0,
                        agent_answer_question_num=0,
                        agent_answer_type="agent_level_answer",
-                        timeout_override=60,
+                        timeout_override=TF_DR_TIMEOUT_LONG,
                        answer_piece=StreamingType.MESSAGE_DELTA.value,
                        ind=current_step_nr,
                        # max_tokens=None,
--- a/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py
+++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py
@@ -30,6 +30,8 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.agents.agent_search.shared_graph_utils.utils import run_with_timeout
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.agents.agent_search.utils import create_question_prompt
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_LONG
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_SHORT
 from onyx.kg.utils.extraction_utils import get_entity_types_str
 from onyx.kg.utils.extraction_utils import get_relationship_types_str
 from onyx.prompts.dr_prompts import DEFAULLT_DECISION_PROMPT
@@ -139,6 +141,7 @@ def orchestrator(
    available_tools = state.available_tools or {}

    uploaded_context = state.uploaded_test_context or ""
+    uploaded_image_context = state.uploaded_image_context or []

    questions = [
        f"{iteration_response.tool}: {iteration_response.question}"
@@ -170,11 +173,48 @@ def orchestrator(
    reasoning_result = "(No reasoning result provided yet.)"
    tool_calls_string = "(No tool calls provided yet.)"

-    if research_type == ResearchType.THOUGHTFUL:
-        if iteration_nr == 1:
-            remaining_time_budget = DR_TIME_BUDGET_BY_TYPE[ResearchType.THOUGHTFUL]
+    if research_type not in ResearchType:
+        raise ValueError(f"Invalid research type: {research_type}")

-        elif iteration_nr > 1:
+    if research_type in [ResearchType.THOUGHTFUL, ResearchType.FAST]:
+        if iteration_nr == 1:
+            remaining_time_budget = DR_TIME_BUDGET_BY_TYPE[research_type]
+
+        elif remaining_time_budget <= 0:
+
+            write_custom_event(
+                current_step_nr,
+                SectionEnd(),
+                writer,
+            )
+
+            current_step_nr += 1
+
+            return OrchestrationUpdate(
+                tools_used=[DRPath.CLOSER.value],
+                current_step_nr=current_step_nr,
+                query_list=[],
+                iteration_nr=iteration_nr,
+                log_messages=[
+                    get_langgraph_node_log_string(
+                        graph_component="main",
+                        node_name="orchestrator",
+                        node_start_time=node_start_time,
+                    )
+                ],
+                plan_of_record=plan_of_record,
+                remaining_time_budget=remaining_time_budget,
+                iteration_instructions=[
+                    IterationInstructions(
+                        iteration_nr=iteration_nr,
+                        plan=None,
+                        reasoning="Time to wrap up.",
+                        purpose="",
+                    )
+                ],
+            )
+
+        elif iteration_nr > 1 and remaining_time_budget > 0:
            # for each iteration past the first one, we need to see whether we
            # have enough information to answer the question.
            # if we do, we can stop the iteration and return the answer.
@@ -200,18 +240,20 @@ def orchestrator(
            reasoning_tokens: list[str] = [""]

            reasoning_tokens, _, _ = run_with_timeout(
-                80,
+                TF_DR_TIMEOUT_LONG,
                lambda: stream_llm_answer(
                    llm=graph_config.tooling.primary_llm,
                    prompt=create_question_prompt(
-                        decision_system_prompt, reasoning_prompt
+                        decision_system_prompt,
+                        reasoning_prompt,
+                        uploaded_image_context=uploaded_image_context,
                    ),
                    event_name="basic_response",
                    writer=writer,
                    agent_answer_level=0,
                    agent_answer_question_num=0,
                    agent_answer_type="agent_level_answer",
-                    timeout_override=60,
+                    timeout_override=TF_DR_TIMEOUT_LONG,
                    answer_piece=StreamingType.REASONING_DELTA.value,
                    ind=current_step_nr,
                    # max_tokens=None,
@@ -295,9 +337,10 @@ def orchestrator(
                    prompt=create_question_prompt(
                        decision_system_prompt,
                        decision_prompt,
+                        uploaded_image_context=uploaded_image_context,
                    ),
                    schema=OrchestratorDecisonsNoPlan,
-                    timeout_override=35,
+                    timeout_override=TF_DR_TIMEOUT_SHORT,
                    # max_tokens=2500,
                )
                next_step = orchestrator_action.next_step
@@ -320,7 +363,7 @@ def orchestrator(
            reasoning_result = "Time to wrap up."
            next_tool_name = DRPath.CLOSER.value

-    else:
+    elif research_type == ResearchType.DEEP:
        if iteration_nr == 1 and not plan_of_record:
            # by default, we start a new iteration, but if there is a feedback request,
            # we start a new iteration 0 again (set a bit later)
@@ -346,9 +389,10 @@ def orchestrator(
                    prompt=create_question_prompt(
                        decision_system_prompt,
                        plan_generation_prompt,
+                        uploaded_image_context=uploaded_image_context,
                    ),
                    schema=OrchestrationPlan,
-                    timeout_override=25,
+                    timeout_override=TF_DR_TIMEOUT_SHORT,
                    # max_tokens=3000,
                )
            except Exception as e:
@@ -368,7 +412,7 @@ def orchestrator(
            )

            _, _, _ = run_with_timeout(
-                80,
+                TF_DR_TIMEOUT_LONG,
                lambda: stream_llm_answer(
                    llm=graph_config.tooling.primary_llm,
                    prompt=repeat_plan_prompt,
@@ -377,7 +421,7 @@ def orchestrator(
                    agent_answer_level=0,
                    agent_answer_question_num=0,
                    agent_answer_type="agent_level_answer",
-                    timeout_override=60,
+                    timeout_override=TF_DR_TIMEOUT_LONG,
                    answer_piece=StreamingType.REASONING_DELTA.value,
                    ind=current_step_nr,
                ),
@@ -424,9 +468,10 @@ def orchestrator(
                    prompt=create_question_prompt(
                        decision_system_prompt,
                        decision_prompt,
+                        uploaded_image_context=uploaded_image_context,
                    ),
                    schema=OrchestratorDecisonsNoPlan,
-                    timeout_override=15,
+                    timeout_override=TF_DR_TIMEOUT_LONG,
                    # max_tokens=1500,
                )
                next_step = orchestrator_action.next_step
@@ -460,7 +505,7 @@ def orchestrator(
        )

        _, _, _ = run_with_timeout(
-            80,
+            TF_DR_TIMEOUT_LONG,
            lambda: stream_llm_answer(
                llm=graph_config.tooling.primary_llm,
                prompt=repeat_reasoning_prompt,
@@ -469,7 +514,7 @@ def orchestrator(
                agent_answer_level=0,
                agent_answer_question_num=0,
                agent_answer_type="agent_level_answer",
-                timeout_override=60,
+                timeout_override=TF_DR_TIMEOUT_LONG,
                answer_piece=StreamingType.REASONING_DELTA.value,
                ind=current_step_nr,
                # max_tokens=None,
@@ -484,6 +529,9 @@ def orchestrator(

        current_step_nr += 1

+    else:
+        raise NotImplementedError(f"Research type {research_type} is not implemented.")
+
    base_next_step_purpose_prompt = get_dr_prompt_orchestration_templates(
        DRPromptPurpose.NEXT_STEP_PURPOSE,
        ResearchType.DEEP,
@@ -498,48 +546,55 @@ def orchestrator(
    )

    purpose_tokens: list[str] = [""]
+    purpose = ""

-    try:
+    if research_type in [ResearchType.THOUGHTFUL, ResearchType.DEEP]:

-        write_custom_event(
-            current_step_nr,
-            ReasoningStart(),
-            writer,
-        )
+        try:

-        purpose_tokens, _, _ = run_with_timeout(
-            80,
-            lambda: stream_llm_answer(
-                llm=graph_config.tooling.primary_llm,
-                prompt=create_question_prompt(
-                    decision_system_prompt,
-                    orchestration_next_step_purpose_prompt,
+            write_custom_event(
+                current_step_nr,
+                ReasoningStart(),
+                writer,
+            )
+
+            purpose_tokens, _, _ = run_with_timeout(
+                TF_DR_TIMEOUT_LONG,
+                lambda: stream_llm_answer(
+                    llm=graph_config.tooling.primary_llm,
+                    prompt=create_question_prompt(
+                        decision_system_prompt,
+                        orchestration_next_step_purpose_prompt,
+                        uploaded_image_context=uploaded_image_context,
+                    ),
+                    event_name="basic_response",
+                    writer=writer,
+                    agent_answer_level=0,
+                    agent_answer_question_num=0,
+                    agent_answer_type="agent_level_answer",
+                    timeout_override=TF_DR_TIMEOUT_LONG,
+                    answer_piece=StreamingType.REASONING_DELTA.value,
+                    ind=current_step_nr,
+                    # max_tokens=None,
                ),
-                event_name="basic_response",
-                writer=writer,
-                agent_answer_level=0,
-                agent_answer_question_num=0,
-                agent_answer_type="agent_level_answer",
-                timeout_override=60,
-                answer_piece=StreamingType.REASONING_DELTA.value,
-                ind=current_step_nr,
-                # max_tokens=None,
-            ),
-        )
+            )

-        write_custom_event(
-            current_step_nr,
-            SectionEnd(),
-            writer,
-        )
+            write_custom_event(
+                current_step_nr,
+                SectionEnd(),
+                writer,
+            )

-        current_step_nr += 1
+            current_step_nr += 1

-    except Exception as e:
-        logger.error(f"Error in orchestration next step purpose: {e}")
-        raise e
+        except Exception as e:
+            logger.error("Error in orchestration next step purpose.")
+            raise e

-    purpose = cast(str, merge_content(*purpose_tokens))
+        purpose = cast(str, merge_content(*purpose_tokens))
+
+    elif research_type == ResearchType.FAST:
+        purpose = f"Answering the question using the {next_tool_name}"

    if not next_tool_name:
        raise ValueError("The next step has not been defined. This should not happen.")
--- a/backend/onyx/agents/agent_search/dr/nodes/dr_a2_closer.py
+++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a2_closer.py
@@ -33,6 +33,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.agents.agent_search.utils import create_question_prompt
 from onyx.chat.chat_utils import llm_doc_from_inference_section
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_LONG
 from onyx.context.search.models import InferenceSection
 from onyx.db.chat import create_search_doc_from_inference_section
 from onyx.db.chat import update_db_session_with_messages
@@ -41,6 +42,7 @@ from onyx.db.models import ResearchAgentIteration
 from onyx.db.models import ResearchAgentIterationSubStep
 from onyx.db.models import SearchDoc as DbSearchDoc
 from onyx.llm.utils import check_number_of_tokens
+from onyx.prompts.chat_prompts import PROJECT_INSTRUCTIONS_SEPARATOR
 from onyx.prompts.dr_prompts import FINAL_ANSWER_PROMPT_W_SUB_ANSWERS
 from onyx.prompts.dr_prompts import FINAL_ANSWER_PROMPT_WITHOUT_SUB_ANSWERS
 from onyx.prompts.dr_prompts import TEST_INFO_COMPLETE_PROMPT
@@ -224,7 +226,7 @@ def closer(

    research_type = graph_config.behavior.research_type

-    assistant_system_prompt = state.assistant_system_prompt
+    assistant_system_prompt: str = state.assistant_system_prompt or ""
    assistant_task_prompt = state.assistant_task_prompt

    uploaded_context = state.uploaded_test_context or ""
@@ -276,7 +278,7 @@ def closer(
                test_info_complete_prompt + (assistant_task_prompt or ""),
            ),
            schema=TestInfoCompleteResponse,
-            timeout_override=40,
+            timeout_override=TF_DR_TIMEOUT_LONG,
            # max_tokens=1000,
        )

@@ -311,10 +313,12 @@ def closer(
        writer,
    )

-    if research_type == ResearchType.THOUGHTFUL:
+    if research_type in [ResearchType.THOUGHTFUL, ResearchType.FAST]:
        final_answer_base_prompt = FINAL_ANSWER_PROMPT_WITHOUT_SUB_ANSWERS
-    else:
+    elif research_type == ResearchType.DEEP:
        final_answer_base_prompt = FINAL_ANSWER_PROMPT_W_SUB_ANSWERS
+    else:
+        raise ValueError(f"Invalid research type: {research_type}")

    estimated_final_answer_prompt_tokens = check_number_of_tokens(
        final_answer_base_prompt.build(
@@ -346,6 +350,13 @@ def closer(
        uploaded_context=uploaded_context,
    )

+    if graph_config.inputs.project_instructions:
+        assistant_system_prompt = (
+            assistant_system_prompt
+            + PROJECT_INSTRUCTIONS_SEPARATOR
+            + (graph_config.inputs.project_instructions or "")
+        )
+
    all_context_llmdocs = [
        llm_doc_from_inference_section(inference_section)
        for inference_section in all_cited_documents
@@ -353,7 +364,7 @@ def closer(

    try:
        streamed_output, _, citation_infos = run_with_timeout(
-            240,
+            int(3 * TF_DR_TIMEOUT_LONG),
            lambda: stream_llm_answer(
                llm=graph_config.tooling.primary_llm,
                prompt=create_question_prompt(
@@ -365,7 +376,7 @@ def closer(
                agent_answer_level=0,
                agent_answer_question_num=0,
                agent_answer_type="agent_level_answer",
-                timeout_override=60,
+                timeout_override=int(2 * TF_DR_TIMEOUT_LONG),
                answer_piece=StreamingType.MESSAGE_DELTA.value,
                ind=current_step_nr,
                context_docs=all_context_llmdocs,
--- a/backend/onyx/agents/agent_search/dr/process_llm_stream.py
+++ b/backend/onyx/agents/agent_search/dr/process_llm_stream.py
@@ -9,6 +9,7 @@ from pydantic import BaseModel
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.chat_utils import saved_search_docs_from_llm_docs
 from onyx.chat.models import AgentAnswerPiece
+from onyx.chat.models import CitationInfo
 from onyx.chat.models import LlmDoc
 from onyx.chat.models import OnyxAnswerPiece
 from onyx.chat.stream_processing.answer_response_handler import AnswerResponseHandler
@@ -18,6 +19,8 @@ from onyx.chat.stream_processing.answer_response_handler import (
 )
 from onyx.chat.stream_processing.utils import map_document_id_order
 from onyx.context.search.models import InferenceSection
+from onyx.server.query_and_chat.streaming_models import CitationDelta
+from onyx.server.query_and_chat.streaming_models import CitationStart
 from onyx.server.query_and_chat.streaming_models import MessageDelta
 from onyx.server.query_and_chat.streaming_models import MessageStart
 from onyx.server.query_and_chat.streaming_models import SectionEnd
@@ -56,6 +59,9 @@ def process_llm_stream(

    full_answer = ""
    start_final_answer_streaming_set = False
+    # Accumulate citation infos if handler emits them
+    collected_citation_infos: list[CitationInfo] = []
+
    # This stream will be the llm answer if no tool is chosen. When a tool is chosen,
    # the stream will contain AIMessageChunks with tool call information.
    for message in messages:
@@ -102,6 +108,9 @@ def process_llm_stream(
                        MessageDelta(content=response_part.answer_piece),
                        writer,
                    )
+                # collect citation info objects
+                elif isinstance(response_part, CitationInfo):
+                    collected_citation_infos.append(response_part)

    if generate_final_answer and start_final_answer_streaming_set:
        # start_final_answer_streaming_set is only set if the answer is verbal and not a tool call
@@ -111,6 +120,14 @@ def process_llm_stream(
            writer,
        )

+        # Emit citations section if any were collected
+        if collected_citation_infos:
+            write_custom_event(ind, CitationStart(), writer)
+            write_custom_event(
+                ind, CitationDelta(citations=collected_citation_infos), writer
+            )
+            write_custom_event(ind, SectionEnd(), writer)
+
    logger.debug(f"Full answer: {full_answer}")
    return BasicSearchProcessedStreamResults(
        ai_message_chunk=cast(AIMessageChunk, tool_call_chunk), full_answer=full_answer
--- a/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py
@@ -1,6 +1,7 @@
 import re
 from datetime import datetime
 from typing import cast
+from uuid import UUID

 from langchain_core.runnables import RunnableConfig
 from langgraph.types import StreamWriter
@@ -22,6 +23,8 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.agents.agent_search.utils import create_question_prompt
 from onyx.chat.models import LlmDoc
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_LONG
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_SHORT
 from onyx.context.search.models import InferenceSection
 from onyx.db.connector import DocumentSource
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
@@ -71,6 +74,7 @@ def basic_search(

    search_tool_info = state.available_tools[state.tools_used[-1]]
    search_tool = cast(SearchTool, search_tool_info.tool_object)
+    force_use_tool = graph_config.tooling.force_use_tool

    # sanity check
    if search_tool != graph_config.tooling.search_tool:
@@ -94,7 +98,7 @@ def basic_search(
                assistant_system_prompt, base_search_processing_prompt
            ),
            schema=BaseSearchProcessingResponse,
-            timeout_override=15,
+            timeout_override=TF_DR_TIMEOUT_SHORT,
            # max_tokens=100,
        )
    except Exception as e:
@@ -139,6 +143,15 @@ def basic_search(
    retrieved_docs: list[InferenceSection] = []
    callback_container: list[list[InferenceSection]] = []

+    user_file_ids: list[UUID] | None = None
+    project_id: int | None = None
+    if force_use_tool.override_kwargs and isinstance(
+        force_use_tool.override_kwargs, SearchToolOverrideKwargs
+    ):
+        override_kwargs = force_use_tool.override_kwargs
+        user_file_ids = override_kwargs.user_file_ids
+        project_id = override_kwargs.project_id
+
    # new db session to avoid concurrency issues
    with get_session_with_current_tenant() as search_db_session:
        for tool_response in search_tool.run(
@@ -150,6 +163,9 @@ def basic_search(
                alternate_db_session=search_db_session,
                retrieved_sections_callback=callback_container.append,
                skip_query_analysis=True,
+                original_query=rewritten_query,
+                user_file_ids=user_file_ids,
+                project_id=project_id,
            ),
        ):
            # get retrieved docs to send to the rest of the graph
@@ -203,7 +219,7 @@ def basic_search(
                assistant_system_prompt, search_prompt + (assistant_task_prompt or "")
            ),
            schema=SearchAnswer,
-            timeout_override=40,
+            timeout_override=TF_DR_TIMEOUT_LONG,
            # max_tokens=1500,
        )

@@ -224,9 +240,9 @@ def basic_search(
            claims,
        ) = extract_document_citations(answer_string, claims)

-        if (citation_numbers and max(citation_numbers) > len(retrieved_docs)) or min(
-            citation_numbers
-        ) < 1:
+        if citation_numbers and (
+            (max(citation_numbers) > len(retrieved_docs)) or min(citation_numbers) < 1
+        ):
            raise ValueError("Citation numbers are out of range for retrieved docs.")

        cited_documents = {
--- a/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_3_reduce.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_3_reduce.py
@@ -5,12 +5,12 @@ from langgraph.types import StreamWriter

 from onyx.agents.agent_search.dr.sub_agents.states import SubAgentMainState
 from onyx.agents.agent_search.dr.sub_agents.states import SubAgentUpdate
-from onyx.agents.agent_search.dr.utils import chunks_or_sections_to_search_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.context.search.models import SavedSearchDoc
+from onyx.context.search.models import SearchDoc
 from onyx.server.query_and_chat.streaming_models import SectionEnd
 from onyx.utils.logger import setup_logger

@@ -47,7 +47,7 @@ def is_reducer(
            doc_list.append(x)

    # Convert InferenceSections to SavedSearchDocs
-    search_docs = chunks_or_sections_to_search_docs(doc_list)
+    search_docs = SearchDoc.from_chunks_or_sections(doc_list)
    retrieved_saved_search_docs = [
        SavedSearchDoc.from_search_doc(search_doc, db_doc_id=0)
        for search_doc in search_docs
--- a/backend/onyx/agents/agent_search/dr/sub_agents/custom_tool/dr_custom_tool_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/custom_tool/dr_custom_tool_2_act.py
@@ -13,6 +13,8 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_LONG
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_SHORT
 from onyx.prompts.dr_prompts import CUSTOM_TOOL_PREP_PROMPT
 from onyx.prompts.dr_prompts import CUSTOM_TOOL_USE_PROMPT
 from onyx.tools.tool_implementations.custom.custom_tool import CUSTOM_TOOL_RESPONSE_ID
@@ -68,7 +70,7 @@ def custom_tool_act(
            tool_use_prompt,
            tools=[custom_tool.tool_definition()],
            tool_choice="required",
-            timeout_override=40,
+            timeout_override=TF_DR_TIMEOUT_LONG,
        )

        # make sure we got a tool call
@@ -124,7 +126,7 @@ def custom_tool_act(
    )
    answer_string = str(
        graph_config.tooling.primary_llm.invoke(
-            tool_summary_prompt, timeout_override=40
+            tool_summary_prompt, timeout_override=TF_DR_TIMEOUT_SHORT
        ).content
    ).strip()

--- a/backend/onyx/agents/agent_search/dr/sub_agents/generic_internal_tool/dr_generic_internal_tool_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/generic_internal_tool/dr_generic_internal_tool_2_act.py
@@ -13,6 +13,7 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_SHORT
 from onyx.prompts.dr_prompts import CUSTOM_TOOL_PREP_PROMPT
 from onyx.prompts.dr_prompts import CUSTOM_TOOL_USE_PROMPT
 from onyx.prompts.dr_prompts import OKTA_TOOL_USE_SPECIAL_PROMPT
@@ -68,7 +69,7 @@ def generic_internal_tool_act(
            tool_use_prompt,
            tools=[generic_internal_tool.tool_definition()],
            tool_choice="required",
-            timeout_override=40,
+            timeout_override=TF_DR_TIMEOUT_SHORT,
        )

        # make sure we got a tool call
@@ -113,7 +114,7 @@ def generic_internal_tool_act(
    )
    answer_string = str(
        graph_config.tooling.primary_llm.invoke(
-            tool_summary_prompt, timeout_override=40
+            tool_summary_prompt, timeout_override=TF_DR_TIMEOUT_SHORT
        ).content
    ).strip()

--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_2_search.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_2_search.py
@@ -25,6 +25,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.agents.agent_search.utils import create_question_prompt
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_SHORT
 from onyx.prompts.dr_prompts import WEB_SEARCH_URL_SELECTION_PROMPT
 from onyx.server.query_and_chat.streaming_models import SearchToolDelta
 from onyx.utils.logger import setup_logger
@@ -108,7 +109,7 @@ def web_search(
            agent_decision_prompt + (assistant_task_prompt or ""),
        ),
        schema=WebSearchAnswer,
-        timeout_override=30,
+        timeout_override=TF_DR_TIMEOUT_SHORT,
    )
    results_to_open = [
        (search_query, search_results[i])
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_6_summarize.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/dr_ws_6_summarize.py
@@ -17,6 +17,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.utils import create_question_prompt
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_SHORT
 from onyx.context.search.models import InferenceSection
 from onyx.prompts.dr_prompts import INTERNAL_SEARCH_PROMPTS
 from onyx.utils.logger import setup_logger
@@ -66,7 +67,7 @@ def is_summarize(
                assistant_system_prompt, search_prompt + (assistant_task_prompt or "")
            ),
            schema=SearchAnswer,
-            timeout_override=40,
+            timeout_override=TF_DR_TIMEOUT_SHORT,
        )
        answer_string = search_answer_json.answer
        claims = search_answer_json.claims or []
--- a/backend/onyx/agents/agent_search/dr/utils.py
+++ b/backend/onyx/agents/agent_search/dr/utils.py
@@ -1,3 +1,4 @@
+import copy
 import re

 from langchain.schema.messages import BaseMessage
@@ -12,9 +13,9 @@ from onyx.agents.agent_search.shared_graph_utils.operators import (
 )
 from onyx.context.search.models import InferenceSection
 from onyx.context.search.models import SavedSearchDoc
-from onyx.context.search.utils import chunks_or_sections_to_search_docs
-from onyx.tools.tool_implementations.internet_search.internet_search_tool import (
-    InternetSearchTool,
+from onyx.context.search.models import SearchDoc
+from onyx.tools.tool_implementations.web_search.web_search_tool import (
+    WebSearchTool,
 )


@@ -72,7 +73,7 @@ def aggregate_context(
    ):

        iteration_tool = iteration_response.tool
-        is_internet = iteration_tool == InternetSearchTool._NAME
+        is_internet = iteration_tool == WebSearchTool._NAME

        for cited_doc in iteration_response.cited_documents.values():
            unrolled_inference_sections.append(cited_doc)
@@ -179,11 +180,40 @@ def get_chat_history_string(chat_history: list[BaseMessage], max_messages: int)
    Get the chat history (up to max_messages) as a string.
    """
    # get past max_messages USER, ASSISTANT message pairs
+
    past_messages = chat_history[-max_messages * 2 :]
-    return ("...\n" if len(chat_history) > len(past_messages) else "") + "\n".join(
+    filtered_past_messages = copy.deepcopy(past_messages)
+
+    for past_message_number, past_message in enumerate(past_messages):
+
+        if isinstance(past_message.content, list):
+            removal_indices = []
+            for content_piece_number, content_piece in enumerate(past_message.content):
+                if (
+                    isinstance(content_piece, dict)
+                    and content_piece.get("type") != "text"
+                ):
+                    removal_indices.append(content_piece_number)
+
+            # Only rebuild the content list if there are items to remove
+            if removal_indices:
+                filtered_past_messages[past_message_number].content = [
+                    content_piece
+                    for content_piece_number, content_piece in enumerate(
+                        past_message.content
+                    )
+                    if content_piece_number not in removal_indices
+                ]
+
+        else:
+            continue
+
+    return (
+        "...\n" if len(chat_history) > len(filtered_past_messages) else ""
+    ) + "\n".join(
        ("user" if isinstance(msg, HumanMessage) else "you")
        + f": {str(msg.content).strip()}"
-        for msg in past_messages
+        for msg in filtered_past_messages
    )


@@ -236,7 +266,7 @@ def convert_inference_sections_to_search_docs(
    is_internet: bool = False,
 ) -> list[SavedSearchDoc]:
    # Convert InferenceSections to SavedSearchDocs
-    search_docs = chunks_or_sections_to_search_docs(inference_sections)
+    search_docs = SearchDoc.from_chunks_or_sections(inference_sections)
    for search_doc in search_docs:
        search_doc.is_internet = is_internet

--- a/backend/onyx/agents/agent_search/models.py
+++ b/backend/onyx/agents/agent_search/models.py
@@ -24,6 +24,7 @@ class GraphInputs(BaseModel):
    prompt_builder: AnswerPromptBuilder
    files: list[InMemoryChatFile] | None = None
    structured_response_format: dict | None = None
+    project_instructions: str | None = None

    class Config:
        arbitrary_types_allowed = True
--- a/backend/onyx/agents/agent_search/orchestration/states.py
+++ b/backend/onyx/agents/agent_search/orchestration/states.py
@@ -1,6 +1,6 @@
 from pydantic import BaseModel

-from onyx.chat.prompt_builder.answer_prompt_builder import PromptSnapshot
+from onyx.chat.prompt_builder.schemas import PromptSnapshot
 from onyx.tools.message import ToolCallSummary
 from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.models import ToolCallFinalResult
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -1,4 +1,3 @@
-import os
 import re
 from collections.abc import Callable
 from collections.abc import Iterator
@@ -8,18 +7,10 @@ from typing import Any
 from typing import cast
 from typing import Literal
 from typing import TypedDict
-from uuid import UUID

 from langchain_core.messages import BaseMessage
-from langchain_core.messages import HumanMessage
 from langgraph.types import StreamWriter
-from sqlalchemy.orm import Session

-from onyx.agents.agent_search.models import GraphConfig
-from onyx.agents.agent_search.models import GraphInputs
-from onyx.agents.agent_search.models import GraphPersistence
-from onyx.agents.agent_search.models import GraphSearchConfig
-from onyx.agents.agent_search.models import GraphTooling
 from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
 from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
@@ -32,9 +23,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import SubQuestionAnswer
 from onyx.agents.agent_search.shared_graph_utils.operators import (
    dedup_inference_section_list,
 )
-from onyx.chat.models import AnswerStyleConfig
-from onyx.chat.models import CitationConfig
-from onyx.chat.models import DocumentPruningConfig
 from onyx.chat.models import MessageResponseIDInfo
 from onyx.chat.models import PromptConfig
 from onyx.chat.models import SectionRelevancePiece
@@ -42,25 +30,16 @@ from onyx.chat.models import StreamingError
 from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
-from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.configs.agent_configs import AGENT_MAX_TOKENS_HISTORY_SUMMARY
 from onyx.configs.agent_configs import (
    AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
 )
 from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION
-from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
-from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
-from onyx.configs.constants import DEFAULT_PERSONA_ID
 from onyx.configs.constants import DISPATCH_SEP_CHAR
 from onyx.configs.constants import FORMAT_DOCS_SEPARATOR
-from onyx.context.search.enums import LLMEvaluationType
 from onyx.context.search.models import InferenceSection
-from onyx.context.search.models import RetrievalDetails
-from onyx.context.search.models import SearchRequest
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
-from onyx.db.persona import get_persona_by_id
 from onyx.db.persona import Persona
-from onyx.db.tools import get_tool_by_name
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.llm.interfaces import LLM
@@ -77,15 +56,12 @@ from onyx.prompts.agent_search import (
 from onyx.prompts.prompt_utils import handle_onyx_date_awareness
 from onyx.server.query_and_chat.streaming_models import Packet
 from onyx.server.query_and_chat.streaming_models import PacketObj
-from onyx.tools.force import ForceUseTool
 from onyx.tools.models import SearchToolOverrideKwargs
-from onyx.tools.tool_constructor import SearchToolConfig
 from onyx.tools.tool_implementations.search.search_tool import (
    SEARCH_RESPONSE_SUMMARY_ID,
 )
 from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
-from onyx.tools.utils import explicit_tool_calling_supported
 from onyx.utils.logger import setup_logger
 from onyx.utils.threadpool_concurrency import run_with_timeout

@@ -156,136 +132,20 @@ def format_entity_term_extraction(
    return "\n".join(entity_strs + relationship_strs + term_strs)


-def get_test_config(
-    db_session: Session,
-    primary_llm: LLM,
-    fast_llm: LLM,
-    search_request: SearchRequest,
-    use_agentic_search: bool = True,
-) -> GraphConfig:
-    persona = get_persona_by_id(DEFAULT_PERSONA_ID, None, db_session)
-    document_pruning_config = DocumentPruningConfig(
-        max_chunks=int(
-            persona.num_chunks
-            if persona.num_chunks is not None
-            else MAX_CHUNKS_FED_TO_CHAT
-        ),
-        max_window_percentage=CHAT_TARGET_CHUNK_PERCENTAGE,
-    )
-
-    answer_style_config = AnswerStyleConfig(
-        citation_config=CitationConfig(
-            # The docs retrieved by this flow are already relevance-filtered
-            all_docs_useful=True
-        ),
-        structured_response_format=None,
-    )
-
-    search_tool_config = SearchToolConfig(
-        answer_style_config=answer_style_config,
-        document_pruning_config=document_pruning_config,
-        retrieval_options=RetrievalDetails(),  # may want to set dedupe_docs=True
-        rerank_settings=None,  # Can use this to change reranking model
-        selected_sections=None,
-        latest_query_files=None,
-        bypass_acl=False,
-    )
-
-    prompt_config = PromptConfig.from_model(persona.prompts[0])
-
-    search_tool = SearchTool(
-        tool_id=get_tool_by_name(SearchTool._NAME, db_session).id,
-        db_session=db_session,
-        user=None,
-        persona=persona,
-        retrieval_options=search_tool_config.retrieval_options,
-        prompt_config=prompt_config,
-        llm=primary_llm,
-        fast_llm=fast_llm,
-        document_pruning_config=search_tool_config.document_pruning_config,
-        answer_style_config=search_tool_config.answer_style_config,
-        selected_sections=search_tool_config.selected_sections,
-        chunks_above=search_tool_config.chunks_above,
-        chunks_below=search_tool_config.chunks_below,
-        full_doc=search_tool_config.full_doc,
-        evaluation_type=(
-            LLMEvaluationType.BASIC
-            if persona.llm_relevance_filter
-            else LLMEvaluationType.SKIP
-        ),
-        rerank_settings=search_tool_config.rerank_settings,
-        bypass_acl=search_tool_config.bypass_acl,
-    )
-
-    graph_inputs = GraphInputs(
-        persona=search_request.persona,
-        rerank_settings=search_tool_config.rerank_settings,
-        prompt_builder=AnswerPromptBuilder(
-            user_message=HumanMessage(content=search_request.query),
-            message_history=[],
-            llm_config=primary_llm.config,
-            raw_user_query=search_request.query,
-            raw_user_uploaded_files=[],
-        ),
-        structured_response_format=answer_style_config.structured_response_format,
-    )
-
-    using_tool_calling_llm = explicit_tool_calling_supported(
-        primary_llm.config.model_provider, primary_llm.config.model_name
-    )
-    graph_tooling = GraphTooling(
-        primary_llm=primary_llm,
-        fast_llm=fast_llm,
-        search_tool=search_tool,
-        tools=[search_tool],
-        force_use_tool=ForceUseTool(force_use=False, tool_name=""),
-        using_tool_calling_llm=using_tool_calling_llm,
-    )
-
-    chat_session_id = (
-        os.environ.get("ONYX_AS_CHAT_SESSION_ID")
-        or "00000000-0000-0000-0000-000000000000"
-    )
-    assert (
-        chat_session_id is not None
-    ), "ONYX_AS_CHAT_SESSION_ID must be set for backend tests"
-    graph_persistence = GraphPersistence(
-        db_session=db_session,
-        chat_session_id=UUID(chat_session_id),
-        message_id=1,
-    )
-
-    search_behavior_config = GraphSearchConfig(
-        use_agentic_search=use_agentic_search,
-        skip_gen_ai_answer_generation=False,
-        allow_refinement=True,
-    )
-    graph_config = GraphConfig(
-        inputs=graph_inputs,
-        tooling=graph_tooling,
-        persistence=graph_persistence,
-        behavior=search_behavior_config,
-    )
-
-    return graph_config
-
-
 def get_persona_agent_prompt_expressions(
    persona: Persona | None,
 ) -> PersonaPromptExpressions:
-    if persona is None or len(persona.prompts) == 0:
-        # TODO base_prompt should be None, but no time to properly fix
+    if persona is None:
        return PersonaPromptExpressions(
            contextualized_prompt=ASSISTANT_SYSTEM_PROMPT_DEFAULT, base_prompt=""
        )

-    # Only a 1:1 mapping between personas and prompts currently
-    prompt = persona.prompts[0]
-    prompt_config = PromptConfig.from_model(prompt)
+    # Prompts are now embedded directly on the Persona model
+    prompt_config = PromptConfig.from_model(persona)
    datetime_aware_system_prompt = handle_onyx_date_awareness(
        prompt_str=prompt_config.system_prompt,
        prompt_config=prompt_config,
-        add_additional_info_if_no_tag=prompt.datetime_aware,
+        add_additional_info_if_no_tag=persona.datetime_aware,
    )

    return PersonaPromptExpressions(
--- a/backend/onyx/background/celery/apps/light.py
+++ b/backend/onyx/background/celery/apps/light.py
@@ -115,7 +115,6 @@ celery_app.autodiscover_tasks(
        "onyx.background.celery.tasks.vespa",
        "onyx.background.celery.tasks.connector_deletion",
        "onyx.background.celery.tasks.doc_permission_syncing",
-        "onyx.background.celery.tasks.user_file_folder_sync",
        "onyx.background.celery.tasks.docprocessing",
    ]
 )
--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -20,6 +20,7 @@ import onyx.background.celery.apps.app_base as app_base
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_utils import celery_is_worker_primary
 from onyx.background.celery.tasks.vespa.document_sync import reset_document_sync
+from onyx.configs.app_configs import CELERY_WORKER_PRIMARY_POOL_OVERFLOW
 from onyx.configs.constants import CELERY_PRIMARY_WORKER_LOCK_TIMEOUT
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
@@ -83,11 +84,11 @@ def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
 def on_worker_init(sender: Worker, **kwargs: Any) -> None:
    logger.info("worker_init signal received.")

-    EXTRA_CONCURRENCY = 4  # small extra fudge factor for connection limits
-
    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME)
    pool_size = cast(int, sender.concurrency)  # type: ignore
-    SqlEngine.init_engine(pool_size=pool_size, max_overflow=EXTRA_CONCURRENCY)
+    SqlEngine.init_engine(
+        pool_size=pool_size, max_overflow=CELERY_WORKER_PRIMARY_POOL_OVERFLOW
+    )

    app_base.wait_for_redis(sender, **kwargs)
    app_base.wait_for_db(sender, **kwargs)
@@ -316,12 +317,12 @@ celery_app.autodiscover_tasks(
    [
        "onyx.background.celery.tasks.connector_deletion",
        "onyx.background.celery.tasks.docprocessing",
+        "onyx.background.celery.tasks.evals",
        "onyx.background.celery.tasks.periodic",
        "onyx.background.celery.tasks.pruning",
        "onyx.background.celery.tasks.shared",
        "onyx.background.celery.tasks.vespa",
        "onyx.background.celery.tasks.llm_model_update",
-        "onyx.background.celery.tasks.user_file_folder_sync",
        "onyx.background.celery.tasks.kg_processing",
    ]
 )
--- a/backend/onyx/background/celery/apps/user_file_processing.py
+++ b/backend/onyx/background/celery/apps/user_file_processing.py
@@ -0,0 +1,113 @@
+from typing import Any
+from typing import cast
+
+from celery import Celery
+from celery import signals
+from celery import Task
+from celery.apps.worker import Worker
+from celery.signals import celeryd_init
+from celery.signals import worker_init
+from celery.signals import worker_process_init
+from celery.signals import worker_ready
+from celery.signals import worker_shutdown
+
+import onyx.background.celery.apps.app_base as app_base
+from onyx.configs.constants import POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME
+from onyx.db.engine.sql_engine import SqlEngine
+from onyx.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT
+
+
+logger = setup_logger()
+
+celery_app = Celery(__name__)
+celery_app.config_from_object("onyx.background.celery.configs.user_file_processing")
+celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]
+
+
+@signals.task_prerun.connect
+def on_task_prerun(
+    sender: Any | None = None,
+    task_id: str | None = None,
+    task: Task | None = None,
+    args: tuple | None = None,
+    kwargs: dict | None = None,
+    **kwds: Any,
+) -> None:
+    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
+
+
+@signals.task_postrun.connect
+def on_task_postrun(
+    sender: Any | None = None,
+    task_id: str | None = None,
+    task: Task | None = None,
+    args: tuple | None = None,
+    kwargs: dict | None = None,
+    retval: Any | None = None,
+    state: str | None = None,
+    **kwds: Any,
+) -> None:
+    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
+
+
+@celeryd_init.connect
+def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
+    app_base.on_celeryd_init(sender, conf, **kwargs)
+
+
+@worker_init.connect
+def on_worker_init(sender: Worker, **kwargs: Any) -> None:
+    logger.info("worker_init signal received.")
+
+    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME)
+
+    # rkuo: Transient errors keep happening in the indexing watchdog threads.
+    # "SSL connection has been closed unexpectedly"
+    # actually setting the spawn method in the cloud fixes 95% of these.
+    # setting pre ping might help even more, but not worrying about that yet
+    pool_size = cast(int, sender.concurrency)  # type: ignore
+    SqlEngine.init_engine(pool_size=pool_size, max_overflow=8)
+
+    app_base.wait_for_redis(sender, **kwargs)
+    app_base.wait_for_db(sender, **kwargs)
+    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
+
+    # Less startup checks in multi-tenant case
+    if MULTI_TENANT:
+        return
+
+    app_base.on_secondary_worker_init(sender, **kwargs)
+
+
+@worker_ready.connect
+def on_worker_ready(sender: Any, **kwargs: Any) -> None:
+    app_base.on_worker_ready(sender, **kwargs)
+
+
+@worker_shutdown.connect
+def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
+    app_base.on_worker_shutdown(sender, **kwargs)
+
+
+@worker_process_init.connect
+def init_worker(**kwargs: Any) -> None:
+    SqlEngine.reset_engine()
+
+
+@signals.setup_logging.connect
+def on_setup_logging(
+    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
+) -> None:
+    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)
+
+
+base_bootsteps = app_base.get_bootsteps()
+for bootstep in base_bootsteps:
+    celery_app.steps["worker"].add(bootstep)
+
+celery_app.autodiscover_tasks(
+    [
+        "onyx.background.celery.tasks.user_file_processing",
+    ]
+)
--- a/backend/onyx/background/celery/configs/primary.py
+++ b/backend/onyx/background/celery/configs/primary.py
@@ -1,4 +1,5 @@
 import onyx.background.celery.configs.base as shared_config
+from onyx.configs.app_configs import CELERY_WORKER_PRIMARY_CONCURRENCY

 broker_url = shared_config.broker_url
 broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
@@ -15,6 +16,6 @@ result_expires = shared_config.result_expires  # 86400 seconds is the default
 task_default_priority = shared_config.task_default_priority
 task_acks_late = shared_config.task_acks_late

-worker_concurrency = 4
+worker_concurrency = CELERY_WORKER_PRIMARY_CONCURRENCY
 worker_pool = "threads"
 worker_prefetch_multiplier = 1
--- a/backend/onyx/background/celery/configs/user_file_processing.py
+++ b/backend/onyx/background/celery/configs/user_file_processing.py
@@ -0,0 +1,22 @@
+import onyx.background.celery.configs.base as shared_config
+from onyx.configs.app_configs import CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY
+
+broker_url = shared_config.broker_url
+broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
+broker_pool_limit = shared_config.broker_pool_limit
+broker_transport_options = shared_config.broker_transport_options
+
+redis_socket_keepalive = shared_config.redis_socket_keepalive
+redis_retry_on_timeout = shared_config.redis_retry_on_timeout
+redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval
+
+result_backend = shared_config.result_backend
+result_expires = shared_config.result_expires  # 86400 seconds is the default
+
+task_default_priority = shared_config.task_default_priority
+task_acks_late = shared_config.task_acks_late
+
+# User file processing worker configuration
+worker_concurrency = CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY
+worker_pool = "threads"
+worker_prefetch_multiplier = 1
--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -26,6 +26,26 @@ CLOUD_DOC_PERMISSION_SYNC_MULTIPLIER_DEFAULT = 1.0

 # tasks that run in either self-hosted on cloud
 beat_task_templates: list[dict] = [
+    {
+        "name": "check-for-user-file-processing",
+        "task": OnyxCeleryTask.CHECK_FOR_USER_FILE_PROCESSING,
+        "schedule": timedelta(seconds=20),
+        "options": {
+            "priority": OnyxCeleryPriority.MEDIUM,
+            "expires": BEAT_EXPIRES_DEFAULT,
+            "queue": OnyxCeleryQueues.USER_FILE_PROCESSING,
+        },
+    },
+    {
+        "name": "user-file-docid-migration",
+        "task": OnyxCeleryTask.USER_FILE_DOCID_MIGRATION,
+        "schedule": timedelta(minutes=1),
+        "options": {
+            "priority": OnyxCeleryPriority.LOW,
+            "expires": BEAT_EXPIRES_DEFAULT,
+            "queue": OnyxCeleryQueues.USER_FILE_PROCESSING,
+        },
+    },
    {
        "name": "check-for-kg-processing",
        "task": OnyxCeleryTask.CHECK_KG_PROCESSING,
@@ -89,17 +109,6 @@ beat_task_templates: list[dict] = [
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
-    {
-        "name": "check-for-user-file-folder-sync",
-        "task": OnyxCeleryTask.CHECK_FOR_USER_FILE_FOLDER_SYNC,
-        "schedule": timedelta(
-            days=1
-        ),  # This should essentially always be triggered manually for user folder updates.
-        "options": {
-            "priority": OnyxCeleryPriority.MEDIUM,
-            "expires": BEAT_EXPIRES_DEFAULT,
-        },
-    },
    {
        "name": "check-for-pruning",
        "task": OnyxCeleryTask.CHECK_FOR_PRUNING,
--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -28,9 +28,6 @@ from onyx.db.connector_credential_pair import add_deletion_failure_message
 from onyx.db.connector_credential_pair import (
    delete_connector_credential_pair__no_commit,
 )
-from onyx.db.connector_credential_pair import (
-    delete_userfiles_for_cc_pair__no_commit,
-)
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
 from onyx.db.document import (
@@ -484,12 +481,6 @@ def monitor_connector_deletion_taskset(
            # related to the deleted DocumentByConnectorCredentialPair during commit
            db_session.expire(cc_pair)

-            # delete all userfiles for the cc_pair
-            delete_userfiles_for_cc_pair__no_commit(
-                db_session=db_session,
-                cc_pair_id=cc_pair_id,
-            )
-
            # finally, delete the cc-pair
            delete_connector_credential_pair__no_commit(
                db_session=db_session,
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -85,8 +85,10 @@ from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.document_batch_storage import DocumentBatchStorage
 from onyx.file_store.document_batch_storage import get_document_batch_storage
 from onyx.httpx.httpx_pool import HttpxPool
+from onyx.indexing.adapters.document_indexing_adapter import (
+    DocumentIndexingBatchAdapter,
+)
 from onyx.indexing.embedder import DefaultIndexingEmbedder
-from onyx.indexing.indexing_pipeline import run_indexing_pipeline
 from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
 from onyx.natural_language_processing.search_nlp_models import (
    InformationContentClassificationModel,
@@ -1268,6 +1270,8 @@ def _docprocessing_task(
    tenant_id: str,
    batch_num: int,
 ) -> None:
+    from onyx.indexing.indexing_pipeline import run_indexing_pipeline
+
    start_time = time.monotonic()

    if tenant_id:
@@ -1369,6 +1373,14 @@ def _docprocessing_task(
                f"Processing {len(documents)} documents through indexing pipeline"
            )

+            adapter = DocumentIndexingBatchAdapter(
+                db_session=db_session,
+                connector_id=index_attempt.connector_credential_pair.connector.id,
+                credential_id=index_attempt.connector_credential_pair.credential.id,
+                tenant_id=tenant_id,
+                index_attempt_metadata=index_attempt_metadata,
+            )
+
            # real work happens here!
            index_pipeline_result = run_indexing_pipeline(
                embedder=embedding_model,
@@ -1378,7 +1390,8 @@ def _docprocessing_task(
                db_session=db_session,
                tenant_id=tenant_id,
                document_batch=documents,
-                index_attempt_metadata=index_attempt_metadata,
+                request_id=index_attempt_metadata.request_id,
+                adapter=adapter,
            )

        # Update batch completion and document counts atomically using database coordination
--- a/backend/onyx/background/celery/tasks/evals/tasks.py
+++ b/backend/onyx/background/celery/tasks/evals/tasks.py
@@ -0,0 +1,35 @@
+from typing import Any
+
+from celery import shared_task
+from celery import Task
+
+from onyx.configs.app_configs import JOB_TIMEOUT
+from onyx.configs.constants import OnyxCeleryTask
+from onyx.evals.eval import run_eval
+from onyx.evals.models import EvalConfigurationOptions
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+@shared_task(
+    name=OnyxCeleryTask.EVAL_RUN_TASK,
+    ignore_result=True,
+    soft_time_limit=JOB_TIMEOUT,
+    bind=True,
+    trail=False,
+)
+def eval_run_task(
+    self: Task,
+    *,
+    configuration_dict: dict[str, Any],
+) -> None:
+    """Background task to run an evaluation with the given configuration"""
+    try:
+        configuration = EvalConfigurationOptions.model_validate(configuration_dict)
+        run_eval(configuration, remote_dataset_name=configuration.dataset_name)
+        logger.info("Successfully completed eval run task")
+
+    except Exception:
+        logger.error("Failed to run eval task")
+        raise
--- a/backend/onyx/background/celery/tasks/monitoring/tasks.py
+++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py
@@ -889,6 +889,12 @@ def monitor_celery_queues_helper(
    n_user_files_indexing = celery_get_queue_length(
        OnyxCeleryQueues.USER_FILES_INDEXING, r_celery
    )
+    n_user_file_processing = celery_get_queue_length(
+        OnyxCeleryQueues.USER_FILE_PROCESSING, r_celery
+    )
+    n_user_file_project_sync = celery_get_queue_length(
+        OnyxCeleryQueues.USER_FILE_PROJECT_SYNC, r_celery
+    )
    n_sync = celery_get_queue_length(OnyxCeleryQueues.VESPA_METADATA_SYNC, r_celery)
    n_deletion = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_DELETION, r_celery)
    n_pruning = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_PRUNING, r_celery)
@@ -916,6 +922,8 @@ def monitor_celery_queues_helper(
        f"docprocessing={n_docprocessing} "
        f"docprocessing_prefetched={len(n_docprocessing_prefetched)} "
        f"user_files_indexing={n_user_files_indexing} "
+        f"user_file_processing={n_user_file_processing} "
+        f"user_file_project_sync={n_user_file_project_sync} "
        f"sync={n_sync} "
        f"deletion={n_deletion} "
        f"pruning={n_pruning} "
--- a/backend/onyx/background/celery/tasks/user_file_folder_sync/tasks.py
+++ b/backend/onyx/background/celery/tasks/user_file_folder_sync/tasks.py
@@ -1,266 +0,0 @@
-import time
-from typing import List
-
-from celery import shared_task
-from celery import Task
-from celery.exceptions import SoftTimeLimitExceeded
-from redis.lock import Lock as RedisLock
-from sqlalchemy.orm import Session
-from tenacity import RetryError
-
-from onyx.background.celery.apps.app_base import task_logger
-from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
-from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
-from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
-from onyx.background.celery.tasks.shared.tasks import OnyxCeleryTaskCompletionStatus
-from onyx.configs.app_configs import JOB_TIMEOUT
-from onyx.configs.constants import CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT
-from onyx.configs.constants import OnyxCeleryTask
-from onyx.configs.constants import OnyxRedisLocks
-from onyx.db.connector_credential_pair import (
-    get_connector_credential_pairs_with_user_files,
-)
-from onyx.db.document import get_document
-from onyx.db.engine.sql_engine import get_session_with_current_tenant
-from onyx.db.models import ConnectorCredentialPair
-from onyx.db.models import Document
-from onyx.db.models import DocumentByConnectorCredentialPair
-from onyx.db.search_settings import get_active_search_settings
-from onyx.db.user_documents import fetch_user_files_for_documents
-from onyx.db.user_documents import fetch_user_folders_for_documents
-from onyx.document_index.factory import get_default_document_index
-from onyx.document_index.interfaces import VespaDocumentUserFields
-from onyx.httpx.httpx_pool import HttpxPool
-from onyx.redis.redis_pool import get_redis_client
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-@shared_task(
-    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_FOLDER_SYNC,
-    ignore_result=True,
-    soft_time_limit=JOB_TIMEOUT,
-    trail=False,
-    bind=True,
-)
-def check_for_user_file_folder_sync(self: Task, *, tenant_id: str) -> bool | None:
-    """Runs periodically to check for documents that need user file folder metadata updates.
-    This task fetches all connector credential pairs with user files, gets the documents
-    associated with them, and updates the user file and folder metadata in Vespa.
-    """
-
-    time_start = time.monotonic()
-
-    r = get_redis_client()
-
-    lock_beat: RedisLock = r.lock(
-        OnyxRedisLocks.CHECK_USER_FILE_FOLDER_SYNC_BEAT_LOCK,
-        timeout=CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT,
-    )
-
-    # these tasks should never overlap
-    if not lock_beat.acquire(blocking=False):
-        return None
-
-    try:
-        with get_session_with_current_tenant() as db_session:
-            # Get all connector credential pairs that have user files
-            cc_pairs = get_connector_credential_pairs_with_user_files(db_session)
-
-            if not cc_pairs:
-                task_logger.info("No connector credential pairs with user files found")
-                return True
-
-            # Get all documents associated with these cc_pairs
-            document_ids = get_documents_for_cc_pairs(cc_pairs, db_session)
-
-            if not document_ids:
-                task_logger.info(
-                    "No documents found for connector credential pairs with user files"
-                )
-                return True
-
-            # Fetch current user file and folder IDs for these documents
-            doc_id_to_user_file_id = fetch_user_files_for_documents(
-                document_ids=document_ids, db_session=db_session
-            )
-            doc_id_to_user_folder_id = fetch_user_folders_for_documents(
-                document_ids=document_ids, db_session=db_session
-            )
-
-            # Update Vespa metadata for each document
-            for doc_id in document_ids:
-                user_file_id = doc_id_to_user_file_id.get(doc_id)
-                user_folder_id = doc_id_to_user_folder_id.get(doc_id)
-
-                if user_file_id is not None or user_folder_id is not None:
-                    # Schedule a task to update the document metadata
-                    update_user_file_folder_metadata.apply_async(
-                        args=(doc_id,),  # Use tuple instead of list for args
-                        kwargs={
-                            "tenant_id": tenant_id,
-                            "user_file_id": user_file_id,
-                            "user_folder_id": user_folder_id,
-                        },
-                        queue="vespa_metadata_sync",
-                    )
-
-            task_logger.info(
-                f"Scheduled metadata updates for {len(document_ids)} documents. "
-                f"Elapsed time: {time.monotonic() - time_start:.2f}s"
-            )
-
-            return True
-    except Exception as e:
-        task_logger.exception(f"Error in check_for_user_file_folder_sync: {e}")
-        return False
-    finally:
-        lock_beat.release()
-
-
-def get_documents_for_cc_pairs(
-    cc_pairs: List[ConnectorCredentialPair], db_session: Session
-) -> List[str]:
-    """Get all document IDs associated with the given connector credential pairs."""
-    if not cc_pairs:
-        return []
-
-    cc_pair_ids = [cc_pair.id for cc_pair in cc_pairs]
-
-    # Query to get document IDs from DocumentByConnectorCredentialPair
-    # Note: DocumentByConnectorCredentialPair uses connector_id and credential_id, not cc_pair_id
-    doc_cc_pairs = (
-        db_session.query(Document.id)
-        .join(
-            DocumentByConnectorCredentialPair,
-            Document.id == DocumentByConnectorCredentialPair.id,
-        )
-        .filter(
-            db_session.query(ConnectorCredentialPair)
-            .filter(
-                ConnectorCredentialPair.id.in_(cc_pair_ids),
-                ConnectorCredentialPair.connector_id
-                == DocumentByConnectorCredentialPair.connector_id,
-                ConnectorCredentialPair.credential_id
-                == DocumentByConnectorCredentialPair.credential_id,
-            )
-            .exists()
-        )
-        .all()
-    )
-
-    return [doc_id for (doc_id,) in doc_cc_pairs]
-
-
-@shared_task(
-    name=OnyxCeleryTask.UPDATE_USER_FILE_FOLDER_METADATA,
-    bind=True,
-    soft_time_limit=LIGHT_SOFT_TIME_LIMIT,
-    time_limit=LIGHT_TIME_LIMIT,
-    max_retries=3,
-)
-def update_user_file_folder_metadata(
-    self: Task,
-    document_id: str,
-    *,
-    tenant_id: str,
-    user_file_id: int | None,
-    user_folder_id: int | None,
-) -> bool:
-    """Updates the user file and folder metadata for a document in Vespa."""
-    start = time.monotonic()
-    completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED
-
-    try:
-        with get_session_with_current_tenant() as db_session:
-            active_search_settings = get_active_search_settings(db_session)
-            doc_index = get_default_document_index(
-                search_settings=active_search_settings.primary,
-                secondary_search_settings=active_search_settings.secondary,
-                httpx_client=HttpxPool.get("vespa"),
-            )
-
-            retry_index = RetryDocumentIndex(doc_index)
-
-            doc = get_document(document_id, db_session)
-            if not doc:
-                elapsed = time.monotonic() - start
-                task_logger.info(
-                    f"doc={document_id} "
-                    f"action=no_operation "
-                    f"elapsed={elapsed:.2f}"
-                )
-                completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED
-                return False
-
-            # Create user fields object with file and folder IDs
-            user_fields = VespaDocumentUserFields(
-                user_file_id=str(user_file_id) if user_file_id is not None else None,
-                user_folder_id=(
-                    str(user_folder_id) if user_folder_id is not None else None
-                ),
-            )
-
-            # Update Vespa. OK if doc doesn't exist. Raises exception otherwise.
-            chunks_affected = retry_index.update_single(
-                document_id,
-                tenant_id=tenant_id,
-                chunk_count=doc.chunk_count,
-                fields=None,  # We're only updating user fields
-                user_fields=user_fields,
-            )
-
-            elapsed = time.monotonic() - start
-            task_logger.info(
-                f"doc={document_id} "
-                f"action=user_file_folder_sync "
-                f"user_file_id={user_file_id} "
-                f"user_folder_id={user_folder_id} "
-                f"chunks={chunks_affected} "
-                f"elapsed={elapsed:.2f}"
-            )
-            completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
-            return True
-
-    except SoftTimeLimitExceeded:
-        task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
-        completion_status = OnyxCeleryTaskCompletionStatus.SOFT_TIME_LIMIT
-    except Exception as ex:
-        e: Exception | None = None
-        while True:
-            if isinstance(ex, RetryError):
-                task_logger.warning(
-                    f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
-                )
-
-                # only set the inner exception if it is of type Exception
-                e_temp = ex.last_attempt.exception()
-                if isinstance(e_temp, Exception):
-                    e = e_temp
-            else:
-                e = ex
-
-            task_logger.exception(
-                f"update_user_file_folder_metadata exceptioned: doc={document_id}"
-            )
-
-            completion_status = OnyxCeleryTaskCompletionStatus.RETRYABLE_EXCEPTION
-            if (
-                self.max_retries is not None
-                and self.request.retries >= self.max_retries
-            ):
-                completion_status = (
-                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
-                )
-
-            # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
-            countdown = 2 ** (self.request.retries + 4)
-            self.retry(exc=e, countdown=countdown)  # this will raise a celery exception
-            break  # we won't hit this, but it looks weird not to have it
-    finally:
-        task_logger.info(
-            f"update_user_file_folder_metadata completed: status={completion_status.value} doc={document_id}"
-        )
-
-    return False
--- a/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
+++ b/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
@@ -0,0 +1,680 @@
+import datetime
+import time
+from collections.abc import Sequence
+from typing import Any
+from uuid import UUID
+
+import httpx
+import sqlalchemy as sa
+from celery import shared_task
+from celery import Task
+from redis.lock import Lock as RedisLock
+from sqlalchemy import select
+
+from onyx.background.celery.apps.app_base import task_logger
+from onyx.background.celery.celery_utils import httpx_init_vespa_pool
+from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
+from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
+from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
+from onyx.configs.app_configs import MANAGED_VESPA
+from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
+from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
+from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
+from onyx.configs.constants import DocumentSource
+from onyx.configs.constants import FileOrigin
+from onyx.configs.constants import OnyxCeleryPriority
+from onyx.configs.constants import OnyxCeleryQueues
+from onyx.configs.constants import OnyxCeleryTask
+from onyx.configs.constants import OnyxRedisLocks
+from onyx.connectors.file.connector import LocalFileConnector
+from onyx.connectors.models import Document
+from onyx.db.engine.sql_engine import get_session_with_current_tenant
+from onyx.db.enums import UserFileStatus
+from onyx.db.models import FileRecord
+from onyx.db.models import SearchDoc
+from onyx.db.models import UserFile
+from onyx.db.search_settings import get_active_search_settings
+from onyx.db.search_settings import get_active_search_settings_list
+from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.interfaces import VespaDocumentUserFields
+from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
+from onyx.document_index.vespa.shared_utils.utils import (
+    replace_invalid_doc_id_characters,
+)
+from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
+from onyx.document_index.vespa_constants import USER_PROJECT
+from onyx.file_store.file_store import get_default_file_store
+from onyx.file_store.file_store import S3BackedFileStore
+from onyx.httpx.httpx_pool import HttpxPool
+from onyx.indexing.adapters.user_file_indexing_adapter import UserFileIndexingAdapter
+from onyx.indexing.embedder import DefaultIndexingEmbedder
+from onyx.indexing.indexing_pipeline import run_indexing_pipeline
+from onyx.natural_language_processing.search_nlp_models import (
+    InformationContentClassificationModel,
+)
+from onyx.redis.redis_pool import get_redis_client
+
+
+def _as_uuid(value: str | UUID) -> UUID:
+    """Return a UUID, accepting either a UUID or a string-like value."""
+    return value if isinstance(value, UUID) else UUID(str(value))
+
+
+def _user_file_lock_key(user_file_id: str | UUID) -> str:
+    return f"{OnyxRedisLocks.USER_FILE_PROCESSING_LOCK_PREFIX}:{user_file_id}"
+
+
+def _user_file_project_sync_lock_key(user_file_id: str | UUID) -> str:
+    return f"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_LOCK_PREFIX}:{user_file_id}"
+
+
+@shared_task(
+    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_PROCESSING,
+    soft_time_limit=300,
+    bind=True,
+    ignore_result=True,
+)
+def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
+    """Scan for user files with PROCESSING status and enqueue per-file tasks.
+
+    Uses direct Redis locks to avoid overlapping runs.
+    """
+    task_logger.info("check_user_file_processing - Starting")
+
+    redis_client = get_redis_client(tenant_id=tenant_id)
+    lock: RedisLock = redis_client.lock(
+        OnyxRedisLocks.USER_FILE_PROCESSING_BEAT_LOCK,
+        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
+    )
+
+    # Do not overlap generator runs
+    if not lock.acquire(blocking=False):
+        return None
+
+    enqueued = 0
+    try:
+        with get_session_with_current_tenant() as db_session:
+            user_file_ids = (
+                db_session.execute(
+                    select(UserFile.id).where(
+                        UserFile.status == UserFileStatus.PROCESSING
+                    )
+                )
+                .scalars()
+                .all()
+            )
+
+            for user_file_id in user_file_ids:
+                self.app.send_task(
+                    OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
+                    kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
+                    queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
+                    priority=OnyxCeleryPriority.HIGH,
+                )
+                enqueued += 1
+
+    finally:
+        if lock.owned():
+            lock.release()
+
+    task_logger.info(
+        f"check_user_file_processing - Enqueued {enqueued} tasks for tenant={tenant_id}"
+    )
+    return None
+
+
+@shared_task(
+    name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
+    bind=True,
+    ignore_result=True,
+)
+def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -> None:
+    task_logger.info(f"process_single_user_file - Starting id={user_file_id}")
+    start = time.monotonic()
+
+    redis_client = get_redis_client(tenant_id=tenant_id)
+    file_lock: RedisLock = redis_client.lock(
+        _user_file_lock_key(user_file_id), timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT
+    )
+
+    if not file_lock.acquire(blocking=False):
+        task_logger.info(
+            f"process_single_user_file - Lock held, skipping user_file_id={user_file_id}"
+        )
+        return None
+
+    documents: list[Document] = []
+    try:
+        with get_session_with_current_tenant() as db_session:
+            uf = db_session.get(UserFile, _as_uuid(user_file_id))
+            if not uf:
+                task_logger.warning(
+                    f"process_single_user_file - UserFile not found id={user_file_id}"
+                )
+                return None
+
+            if uf.status != UserFileStatus.PROCESSING:
+                task_logger.info(
+                    f"process_single_user_file - Skipping id={user_file_id} status={uf.status}"
+                )
+                return None
+
+            connector = LocalFileConnector(
+                file_locations=[uf.file_id],
+                file_names=[uf.name] if uf.name else None,
+                zip_metadata={},
+            )
+            connector.load_credentials({})
+
+            # 20 is the documented default for httpx max_keepalive_connections
+            if MANAGED_VESPA:
+                httpx_init_vespa_pool(
+                    20, ssl_cert=VESPA_CLOUD_CERT_PATH, ssl_key=VESPA_CLOUD_KEY_PATH
+                )
+            else:
+                httpx_init_vespa_pool(20)
+
+            search_settings_list = get_active_search_settings_list(db_session)
+
+            current_search_settings = next(
+                (
+                    search_settings_instance
+                    for search_settings_instance in search_settings_list
+                    if search_settings_instance.status.is_current()
+                ),
+                None,
+            )
+
+            if current_search_settings is None:
+                raise RuntimeError(
+                    f"process_single_user_file - No current search settings found for tenant={tenant_id}"
+                )
+
+            try:
+                for batch in connector.load_from_state():
+                    documents.extend(batch)
+
+                adapter = UserFileIndexingAdapter(
+                    tenant_id=tenant_id,
+                    db_session=db_session,
+                )
+
+                # Set up indexing pipeline components
+                embedding_model = DefaultIndexingEmbedder.from_db_search_settings(
+                    search_settings=current_search_settings,
+                )
+
+                information_content_classification_model = (
+                    InformationContentClassificationModel()
+                )
+
+                document_index = get_default_document_index(
+                    current_search_settings,
+                    None,
+                    httpx_client=HttpxPool.get("vespa"),
+                )
+
+                # update the doument id to userfile id in the documents
+                for document in documents:
+                    document.id = str(user_file_id)
+                    document.source = DocumentSource.USER_FILE
+
+                # real work happens here!
+                index_pipeline_result = run_indexing_pipeline(
+                    embedder=embedding_model,
+                    information_content_classification_model=information_content_classification_model,
+                    document_index=document_index,
+                    ignore_time_skip=True,
+                    db_session=db_session,
+                    tenant_id=tenant_id,
+                    document_batch=documents,
+                    request_id=None,
+                    adapter=adapter,
+                )
+
+                task_logger.info(
+                    f"process_single_user_file - Indexing pipeline completed ={index_pipeline_result}"
+                )
+
+                if (
+                    index_pipeline_result.failures
+                    or index_pipeline_result.total_docs != len(documents)
+                    or index_pipeline_result.total_chunks == 0
+                ):
+                    task_logger.error(
+                        f"process_single_user_file - Indexing pipeline failed id={user_file_id}"
+                    )
+                    uf.status = UserFileStatus.FAILED
+                    db_session.add(uf)
+                    db_session.commit()
+                    return None
+
+            except Exception as e:
+                task_logger.exception(
+                    f"process_single_user_file - Error processing file id={user_file_id} - {e.__class__.__name__}"
+                )
+                uf.status = UserFileStatus.FAILED
+                db_session.add(uf)
+                db_session.commit()
+                return None
+
+        elapsed = time.monotonic() - start
+        task_logger.info(
+            f"process_single_user_file - Finished id={user_file_id} docs={len(documents)} elapsed={elapsed:.2f}s"
+        )
+        return None
+    except Exception as e:
+        # Attempt to mark the file as failed
+        with get_session_with_current_tenant() as db_session:
+            uf = db_session.get(UserFile, _as_uuid(user_file_id))
+            if uf:
+                uf.status = UserFileStatus.FAILED
+                db_session.add(uf)
+                db_session.commit()
+
+        task_logger.exception(
+            f"process_single_user_file - Error processing file id={user_file_id} - {e.__class__.__name__}"
+        )
+        return None
+    finally:
+        if file_lock.owned():
+            file_lock.release()
+
+
+@shared_task(
+    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_PROJECT_SYNC,
+    soft_time_limit=300,
+    bind=True,
+    ignore_result=True,
+)
+def check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:
+    """Scan for user files with PROJECT_SYNC status and enqueue per-file tasks."""
+    task_logger.info("check_for_user_file_project_sync - Starting")
+
+    redis_client = get_redis_client(tenant_id=tenant_id)
+    lock: RedisLock = redis_client.lock(
+        OnyxRedisLocks.USER_FILE_PROJECT_SYNC_BEAT_LOCK,
+        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
+    )
+
+    if not lock.acquire(blocking=False):
+        return None
+
+    enqueued = 0
+    try:
+        with get_session_with_current_tenant() as db_session:
+            user_file_ids = (
+                db_session.execute(
+                    select(UserFile.id).where(
+                        UserFile.needs_project_sync.is_(True)
+                        and UserFile.status == UserFileStatus.COMPLETED
+                    )
+                )
+                .scalars()
+                .all()
+            )
+
+            for user_file_id in user_file_ids:
+                self.app.send_task(
+                    OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
+                    kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
+                    queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
+                    priority=OnyxCeleryPriority.HIGH,
+                )
+                enqueued += 1
+    finally:
+        if lock.owned():
+            lock.release()
+
+    task_logger.info(
+        f"check_for_user_file_project_sync - Enqueued {enqueued} tasks for tenant={tenant_id}"
+    )
+    return None
+
+
+@shared_task(
+    name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
+    bind=True,
+    ignore_result=True,
+)
+def process_single_user_file_project_sync(
+    self: Task, *, user_file_id: str, tenant_id: str
+) -> None:
+    """Process a single user file project sync."""
+    task_logger.info(
+        f"process_single_user_file_project_sync - Starting id={user_file_id}"
+    )
+
+    redis_client = get_redis_client(tenant_id=tenant_id)
+    file_lock: RedisLock = redis_client.lock(
+        _user_file_project_sync_lock_key(user_file_id),
+        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
+    )
+
+    if not file_lock.acquire(blocking=False):
+        task_logger.info(
+            f"process_single_user_file_project_sync - Lock held, skipping user_file_id={user_file_id}"
+        )
+        return None
+
+    try:
+        with get_session_with_current_tenant() as db_session:
+            active_search_settings = get_active_search_settings(db_session)
+            doc_index = get_default_document_index(
+                search_settings=active_search_settings.primary,
+                secondary_search_settings=active_search_settings.secondary,
+                httpx_client=HttpxPool.get("vespa"),
+            )
+            retry_index = RetryDocumentIndex(doc_index)
+
+            user_file = db_session.get(UserFile, _as_uuid(user_file_id))
+            if not user_file:
+                task_logger.info(
+                    f"process_single_user_file_project_sync - User file not found id={user_file_id}"
+                )
+                return None
+
+            project_ids = [project.id for project in user_file.projects]
+            chunks_affected = retry_index.update_single(
+                doc_id=str(user_file.id),
+                tenant_id=tenant_id,
+                chunk_count=user_file.chunk_count,
+                fields=None,
+                user_fields=VespaDocumentUserFields(user_projects=project_ids),
+            )
+
+            task_logger.info(
+                f"process_single_user_file_project_sync - Chunks affected id={user_file_id} chunks={chunks_affected}"
+            )
+
+            user_file.needs_project_sync = False
+            user_file.last_project_sync_at = datetime.datetime.now(
+                datetime.timezone.utc
+            )
+            db_session.add(user_file)
+            db_session.commit()
+
+    except Exception as e:
+        task_logger.exception(
+            f"process_single_user_file_project_sync - Error syncing project for file id={user_file_id} - {e.__class__.__name__}"
+        )
+        return None
+    finally:
+        if file_lock.owned():
+            file_lock.release()
+
+    return None
+
+
+def _normalize_legacy_user_file_doc_id(old_id: str) -> str:
+    # Convert USER_FILE_CONNECTOR__<uuid> -> FILE_CONNECTOR__<uuid> for legacy values
+    user_prefix = "USER_FILE_CONNECTOR__"
+    file_prefix = "FILE_CONNECTOR__"
+    if old_id.startswith(user_prefix):
+        remainder = old_id[len(user_prefix) :]
+        return file_prefix + remainder
+    return old_id
+
+
+def _visit_chunks(
+    *,
+    http_client: httpx.Client,
+    index_name: str,
+    selection: str,
+    continuation: str | None = None,
+) -> tuple[list[dict[str, Any]], str | None]:
+    base_url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)
+    params: dict[str, str] = {
+        "selection": selection,
+        "wantedDocumentCount": "1000",
+    }
+    if continuation:
+        params["continuation"] = continuation
+    resp = http_client.get(base_url, params=params, timeout=None)
+    resp.raise_for_status()
+    payload = resp.json()
+    return payload.get("documents", []), payload.get("continuation")
+
+
+def _update_document_id_in_vespa(
+    *,
+    index_name: str,
+    old_doc_id: str,
+    new_doc_id: str,
+    user_project_ids: list[int] | None = None,
+) -> None:
+    clean_new_doc_id = replace_invalid_doc_id_characters(new_doc_id)
+    normalized_old = _normalize_legacy_user_file_doc_id(old_doc_id)
+    clean_old_doc_id = replace_invalid_doc_id_characters(normalized_old)
+
+    selection = f"{index_name}.document_id=='{clean_old_doc_id}'"
+    task_logger.debug(f"Vespa selection: {selection}")
+
+    with get_vespa_http_client() as http_client:
+        continuation: str | None = None
+        while True:
+            docs, continuation = _visit_chunks(
+                http_client=http_client,
+                index_name=index_name,
+                selection=selection,
+                continuation=continuation,
+            )
+            if not docs:
+                break
+            for doc in docs:
+                vespa_full_id = doc.get("id")
+                if not vespa_full_id:
+                    continue
+                vespa_doc_uuid = vespa_full_id.split("::")[-1]
+                vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_doc_uuid}"
+                update_request: dict[str, Any] = {
+                    "fields": {"document_id": {"assign": clean_new_doc_id}}
+                }
+                if user_project_ids is not None:
+                    update_request["fields"][USER_PROJECT] = {
+                        "assign": user_project_ids
+                    }
+                r = http_client.put(vespa_url, json=update_request)
+                r.raise_for_status()
+            if not continuation:
+                break
+
+
+@shared_task(
+    name=OnyxCeleryTask.USER_FILE_DOCID_MIGRATION,
+    ignore_result=True,
+    soft_time_limit=LIGHT_SOFT_TIME_LIMIT,
+    time_limit=LIGHT_TIME_LIMIT,
+    bind=True,
+)
+def user_file_docid_migration_task(self: Task, *, tenant_id: str) -> bool:
+    """Per-tenant job to update Vespa and search_doc document_id values for user files.
+
+    - For each user_file with a legacy document_id, set Vespa `document_id` to the UUID `user_file.id`.
+    - Update `search_doc.document_id` to the same UUID string.
+    """
+
+    try:
+        with get_session_with_current_tenant() as db_session:
+            active_settings = get_active_search_settings(db_session)
+            document_index = get_default_document_index(
+                active_settings.primary,
+                active_settings.secondary,
+            )
+            if hasattr(document_index, "index_name"):
+                index_name = document_index.index_name
+            else:
+                index_name = "danswer_index"
+
+            # Fetch mappings of legacy -> new ids
+            rows = db_session.execute(
+                sa.select(
+                    UserFile.document_id.label("document_id"),
+                    UserFile.id.label("id"),
+                ).where(
+                    UserFile.document_id.is_not(None),
+                    UserFile.document_id_migrated.is_(False),
+                )
+            ).all()
+
+            # dedupe by old document_id
+            seen: set[str] = set()
+            for row in rows:
+                old_doc_id = str(row.document_id)
+                new_uuid = str(row.id)
+                if not old_doc_id or not new_uuid or old_doc_id in seen:
+                    continue
+                seen.add(old_doc_id)
+                # collect user project ids for a combined Vespa update
+                user_project_ids: list[int] | None = None
+                try:
+                    uf = db_session.get(UserFile, UUID(new_uuid))
+                    if uf is not None:
+                        user_project_ids = [project.id for project in uf.projects]
+                except Exception as e:
+                    task_logger.warning(
+                        f"Tenant={tenant_id} failed fetching projects for doc_id={new_uuid} - {e.__class__.__name__}"
+                    )
+                try:
+                    _update_document_id_in_vespa(
+                        index_name=index_name,
+                        old_doc_id=old_doc_id,
+                        new_doc_id=new_uuid,
+                        user_project_ids=user_project_ids,
+                    )
+                except Exception as e:
+                    task_logger.warning(
+                        f"Tenant={tenant_id} failed Vespa update for doc_id={new_uuid} - {e.__class__.__name__}"
+                    )
+            # Update search_doc records to refer to the UUID string
+            # we are not using document_id_migrated = false because if the migration already completed,
+            # it will not run again and we will not update the search_doc records because of the issue currently fixed
+            user_files = (
+                db_session.execute(
+                    sa.select(UserFile).where(UserFile.document_id.is_not(None))
+                )
+                .scalars()
+                .all()
+            )
+
+            # Query all SearchDocs that need updating
+            search_docs = (
+                db_session.execute(
+                    sa.select(SearchDoc).where(
+                        SearchDoc.document_id.like("%FILE_CONNECTOR__%")
+                    )
+                )
+                .scalars()
+                .all()
+            )
+
+            task_logger.info(f"Found {len(user_files)} user files to update")
+            task_logger.info(f"Found {len(search_docs)} search docs to update")
+
+            # Build a map of normalized doc IDs to SearchDocs
+            search_doc_map: dict[str, list[SearchDoc]] = {}
+            for sd in search_docs:
+                doc_id = sd.document_id
+                if search_doc_map.get(doc_id) is None:
+                    search_doc_map[doc_id] = []
+                search_doc_map[doc_id].append(sd)
+
+            # Process each UserFile and update matching SearchDocs
+            updated_count = 0
+            for uf in user_files:
+                doc_id = uf.document_id
+                if doc_id.startswith("USER_FILE_CONNECTOR__"):
+                    doc_id = "FILE_CONNECTOR__" + doc_id[len("USER_FILE_CONNECTOR__") :]
+
+                if doc_id in search_doc_map:
+                    # Update the SearchDoc to use the UserFile's UUID
+                    for search_doc in search_doc_map[doc_id]:
+                        search_doc.document_id = str(uf.id)
+                        db_session.add(search_doc)
+
+                    # Mark UserFile as migrated
+                    uf.document_id_migrated = True
+                    db_session.add(uf)
+                    updated_count += 1
+
+            task_logger.info(
+                f"Updated {updated_count} SearchDoc records with new UUIDs"
+            )
+            db_session.commit()
+
+            # Normalize plaintext FileRecord blobs: ensure S3 object key aligns with current file_id
+            try:
+                store = get_default_file_store()
+                # Only supported for S3-backed stores where we can manipulate object keys
+                if isinstance(store, S3BackedFileStore):
+                    s3_client = store._get_s3_client()
+                    bucket_name = store._get_bucket_name()
+
+                    plaintext_records: Sequence[FileRecord] = (
+                        db_session.execute(
+                            sa.select(FileRecord).where(
+                                FileRecord.file_origin == FileOrigin.PLAINTEXT_CACHE,
+                                FileRecord.file_id.like("plaintext_%"),
+                            )
+                        )
+                        .scalars()
+                        .all()
+                    )
+
+                    normalized = 0
+                    for fr in plaintext_records:
+                        try:
+                            expected_key = store._get_s3_key(fr.file_id)
+                            if fr.object_key == expected_key:
+                                continue
+
+                            # Copy old object to new key
+                            copy_source = f"{fr.bucket_name}/{fr.object_key}"
+                            s3_client.copy_object(
+                                CopySource=copy_source,
+                                Bucket=bucket_name,
+                                Key=expected_key,
+                                MetadataDirective="COPY",
+                            )
+
+                            # Delete old object (best-effort)
+                            try:
+                                s3_client.delete_object(
+                                    Bucket=fr.bucket_name, Key=fr.object_key
+                                )
+                            except Exception:
+                                pass
+
+                            # Update DB record with new key
+                            fr.object_key = expected_key
+                            db_session.add(fr)
+                            normalized += 1
+                        except Exception as e:
+                            task_logger.warning(
+                                f"Tenant={tenant_id} failed plaintext object normalize for "
+                                f"id={fr.file_id} - {e.__class__.__name__}"
+                            )
+
+                    if normalized:
+                        db_session.commit()
+                        task_logger.info(
+                            f"user_file_docid_migration_task normalized {normalized} plaintext objects for tenant={tenant_id}"
+                        )
+                else:
+                    task_logger.info(
+                        "user_file_docid_migration_task skipping plaintext object normalization (non-S3 store)"
+                    )
+            except Exception:
+                task_logger.exception(
+                    f"user_file_docid_migration_task - Error during plaintext normalization for tenant={tenant_id}"
+                )
+
+        task_logger.info(
+            f"user_file_docid_migration_task completed for tenant={tenant_id} (rows={len(rows)})"
+        )
+        return True
+    except Exception:
+        task_logger.exception(
+            f"user_file_docid_migration_task - Error during execution for tenant={tenant_id}"
+        )
+        return False
--- a/backend/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/onyx/background/celery/tasks/vespa/tasks.py
@@ -414,8 +414,14 @@ def monitor_document_set_taskset(
        get_document_set_by_id(db_session=db_session, document_set_id=document_set_id),
    )  # casting since we "know" a document set with this ID exists
    if document_set:
-        if not document_set.connector_credential_pairs:
-            # if there are no connectors, then delete the document set.
+        has_connector_pairs = bool(document_set.connector_credential_pairs)
+        # Federated connectors should keep a document set alive even without cc pairs.
+        has_federated_connectors = bool(
+            getattr(document_set, "federated_connectors", [])
+        )
+
+        if not has_connector_pairs and not has_federated_connectors:
+            # If there are no connectors of any kind, delete the document set.
            delete_document_set(document_set_row=document_set, db_session=db_session)
            task_logger.info(
                f"Successfully deleted document set: document_set={document_set_id}"
--- a/backend/onyx/background/celery/versioned_apps/user_file_processing.py
+++ b/backend/onyx/background/celery/versioned_apps/user_file_processing.py
@@ -0,0 +1,16 @@
+"""Factory stub for running the user file processing Celery worker."""
+
+from celery import Celery
+
+from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
+
+set_is_ee_based_on_env_variable()
+
+
+def get_app() -> Celery:
+    from onyx.background.celery.apps.user_file_processing import celery_app
+
+    return celery_app
+
+
+app = get_app()
--- a/backend/onyx/background/indexing/run_docfetching.py
+++ b/backend/onyx/background/indexing/run_docfetching.py
@@ -28,7 +28,6 @@ from onyx.configs.constants import OnyxCeleryTask
 from onyx.connectors.connector_runner import ConnectorRunner
 from onyx.connectors.exceptions import ConnectorValidationError
 from onyx.connectors.exceptions import UnexpectedValidationError
-from onyx.connectors.factory import instantiate_connector
 from onyx.connectors.interfaces import CheckpointedConnector
 from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorStopSignal
@@ -64,9 +63,11 @@ from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.document_batch_storage import DocumentBatchStorage
 from onyx.file_store.document_batch_storage import get_document_batch_storage
 from onyx.httpx.httpx_pool import HttpxPool
+from onyx.indexing.adapters.document_indexing_adapter import (
+    DocumentIndexingBatchAdapter,
+)
 from onyx.indexing.embedder import DefaultIndexingEmbedder
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
-from onyx.indexing.indexing_pipeline import run_indexing_pipeline
 from onyx.natural_language_processing.search_nlp_models import (
    InformationContentClassificationModel,
 )
@@ -100,6 +101,8 @@ def _get_connector_runner(
    are the complete list of existing documents of the connector. If the task
    of type LOAD_STATE, the list will be considered complete and otherwise incomplete.
    """
+    from onyx.connectors.factory import instantiate_connector
+
    task = attempt.connector_credential_pair.connector.input_type

    try:
@@ -283,6 +286,8 @@ def _run_indexing(
    2. Embed and index these documents into the chosen datastore (vespa)
    3. Updates Postgres to record the indexed documents + the outcome of this run
    """
+    from onyx.indexing.indexing_pipeline import run_indexing_pipeline
+
    start_time = time.monotonic()  # jsut used for logging

    with get_session_with_current_tenant() as db_session_temp:
@@ -567,6 +572,13 @@ def _run_indexing(
                index_attempt_md.batch_num = batch_num + 1  # use 1-index for this

                # real work happens here!
+                adapter = DocumentIndexingBatchAdapter(
+                    db_session=db_session,
+                    connector_id=ctx.connector_id,
+                    credential_id=ctx.credential_id,
+                    tenant_id=tenant_id,
+                    index_attempt_metadata=index_attempt_md,
+                )
                index_pipeline_result = run_indexing_pipeline(
                    embedder=embedding_model,
                    information_content_classification_model=information_content_classification_model,
@@ -578,7 +590,8 @@ def _run_indexing(
                    db_session=db_session,
                    tenant_id=tenant_id,
                    document_batch=doc_batch_cleaned,
-                    index_attempt_metadata=index_attempt_md,
+                    request_id=index_attempt_md.request_id,
+                    adapter=adapter,
                )

                batch_num += 1
--- a/backend/onyx/chat/answer.py
+++ b/backend/onyx/chat/answer.py
@@ -19,6 +19,7 @@ from onyx.chat.models import StreamStopReason
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.configs.agent_configs import AGENT_ALLOW_REFINEMENT
 from onyx.configs.agent_configs import INITIAL_SEARCH_DECOMPOSITION_ENABLED
+from onyx.configs.agent_configs import TF_DR_DEFAULT_FAST
 from onyx.context.search.models import RerankingDetails
 from onyx.db.kg_config import get_kg_config_settings
 from onyx.db.models import Persona
@@ -61,6 +62,7 @@ class Answer:
        use_agentic_search: bool = False,
        research_type: ResearchType | None = None,
        research_plan: dict[str, Any] | None = None,
+        project_instructions: str | None = None,
    ) -> None:
        self.is_connected: Callable[[], bool] | None = is_connected
        self._processed_stream: list[AnswerStreamPart] | None = None
@@ -96,6 +98,7 @@ class Answer:
            prompt_builder=prompt_builder,
            files=latest_query_files,
            structured_response_format=answer_style_config.structured_response_format,
+            project_instructions=project_instructions,
        )
        self.graph_tooling = GraphTooling(
            primary_llm=llm,
@@ -110,6 +113,14 @@ class Answer:
            chat_session_id=chat_session_id,
            message_id=current_agent_message_id,
        )
+
+        if use_agentic_search:
+            research_type = ResearchType.DEEP
+        elif TF_DR_DEFAULT_FAST:
+            research_type = ResearchType.FAST
+        else:
+            research_type = ResearchType.THOUGHTFUL
+
        self.search_behavior_config = GraphSearchConfig(
            use_agentic_search=use_agentic_search,
            skip_gen_ai_answer_generation=skip_gen_ai_answer_generation,
@@ -117,9 +128,7 @@ class Answer:
            allow_agent_reranking=allow_agent_reranking,
            perform_initial_search_decomposition=INITIAL_SEARCH_DECOMPOSITION_ENABLED,
            kg_config_settings=get_kg_config_settings(),
-            research_type=(
-                ResearchType.DEEP if use_agentic_search else ResearchType.THOUGHTFUL
-            ),
+            research_type=research_type,
        )
        self.graph_config = GraphConfig(
            inputs=self.graph_inputs,
--- a/backend/onyx/chat/chat_utils.py
+++ b/backend/onyx/chat/chat_utils.py
@@ -32,17 +32,18 @@ from onyx.db.llm import fetch_existing_doc_sets
 from onyx.db.llm import fetch_existing_tools
 from onyx.db.models import ChatMessage
 from onyx.db.models import Persona
-from onyx.db.models import Prompt
+from onyx.db.models import SearchDoc as DbSearchDoc
 from onyx.db.models import Tool
 from onyx.db.models import User
-from onyx.db.prompts import get_prompts_by_ids
 from onyx.db.search_settings import get_current_search_settings
 from onyx.kg.models import KGException
 from onyx.kg.setup.kg_default_entity_definitions import (
    populate_missing_default_entity_types__commit,
 )
 from onyx.llm.models import PreviousMessage
+from onyx.llm.override_models import LLMOverride
 from onyx.natural_language_processing.utils import BaseTokenizer
+from onyx.onyxbot.slack.models import SlackContext
 from onyx.server.query_and_chat.models import CreateChatMessageRequest
 from onyx.server.query_and_chat.streaming_models import CitationInfo
 from onyx.tools.tool_implementations.custom.custom_tool import (
@@ -59,13 +60,15 @@ def prepare_chat_message_request(
    persona_id: int | None,
    # Does the question need to have a persona override
    persona_override_config: PersonaOverrideConfig | None,
-    prompt: Prompt | None,
    message_ts_to_respond_to: str | None,
    retrieval_details: RetrievalDetails | None,
    rerank_settings: RerankingDetails | None,
    db_session: Session,
    use_agentic_search: bool = False,
    skip_gen_ai_answer_generation: bool = False,
+    llm_override: LLMOverride | None = None,
+    allowed_tool_ids: list[int] | None = None,
+    slack_context: SlackContext | None = None,
 ) -> CreateChatMessageRequest:
    # Typically used for one shot flows like SlackBot or non-chat API endpoint use cases
    new_chat_session = create_chat_session(
@@ -83,7 +86,6 @@ def prepare_chat_message_request(
        parent_message_id=None,  # It's a standalone chat session each time
        message=message_text,
        file_descriptors=[],  # Currently SlackBot/answer api do not support files in the context
-        prompt_id=prompt.id if prompt else None,
        # Can always override the persona for the single query, if it's a normal persona
        # then it will be treated the same
        persona_override_config=persona_override_config,
@@ -92,6 +94,9 @@ def prepare_chat_message_request(
        rerank_settings=rerank_settings,
        use_agentic_search=use_agentic_search,
        skip_gen_ai_answer_generation=skip_gen_ai_answer_generation,
+        llm_override=llm_override,
+        allowed_tool_ids=allowed_tool_ids,
+        slack_context=slack_context,  # Pass Slack context
    )


@@ -339,6 +344,45 @@ def reorganize_citations(
    return new_answer, list(new_citation_info.values())


+def build_citation_map_from_infos(
+    citations_list: list[CitationInfo], db_docs: list[DbSearchDoc]
+) -> dict[int, int]:
+    """Translate a list of streaming CitationInfo objects into a mapping of
+    citation number -> saved search doc DB id.
+
+    Always cites the first instance of a document_id and assumes db_docs are
+    ordered as shown to the user (display order).
+    """
+    doc_id_to_saved_doc_id_map: dict[str, int] = {}
+    for db_doc in db_docs:
+        if db_doc.document_id not in doc_id_to_saved_doc_id_map:
+            doc_id_to_saved_doc_id_map[db_doc.document_id] = db_doc.id
+
+    citation_to_saved_doc_id_map: dict[int, int] = {}
+    for citation in citations_list:
+        if citation.citation_num not in citation_to_saved_doc_id_map:
+            saved_id = doc_id_to_saved_doc_id_map.get(citation.document_id)
+            if saved_id is not None:
+                citation_to_saved_doc_id_map[citation.citation_num] = saved_id
+
+    return citation_to_saved_doc_id_map
+
+
+def build_citation_map_from_numbers(
+    cited_numbers: list[int] | set[int], db_docs: list[DbSearchDoc]
+) -> dict[int, int]:
+    """Translate parsed citation numbers (e.g., from [[n]]) into a mapping of
+    citation number -> saved search doc DB id by positional index.
+    """
+    citation_to_saved_doc_id_map: dict[int, int] = {}
+    for num in sorted(set(cited_numbers)):
+        idx = num - 1
+        if 0 <= idx < len(db_docs):
+            citation_to_saved_doc_id_map[num] = db_docs[idx].id
+
+    return citation_to_saved_doc_id_map
+
+
 def extract_headers(
    headers: dict[str, str] | Headers, pass_through_headers: list[str] | None
 ) -> dict[str, str]:
@@ -389,20 +433,11 @@ def create_temporary_persona(
    )

    if persona_config.prompts:
-        persona.prompts = [
-            Prompt(
-                name=p.name,
-                description=p.description,
-                system_prompt=p.system_prompt,
-                task_prompt=p.task_prompt,
-                datetime_aware=p.datetime_aware,
-            )
-            for p in persona_config.prompts
-        ]
-    elif persona_config.prompt_ids:
-        persona.prompts = get_prompts_by_ids(
-            db_session=db_session, prompt_ids=persona_config.prompt_ids
-        )
+        # Use the first prompt from the override config for embedded prompt fields
+        first_prompt = persona_config.prompts[0]
+        persona.system_prompt = first_prompt.system_prompt
+        persona.task_prompt = first_prompt.task_prompt
+        persona.datetime_aware = first_prompt.datetime_aware

    persona.tools = []
    if persona_config.custom_tools_openapi:
--- a/backend/onyx/chat/models.py
+++ b/backend/onyx/chat/models.py
@@ -30,7 +30,7 @@ from onyx.tools.models import ToolResponse
 from onyx.tools.tool_implementations.custom.base_tool_types import ToolResultType

 if TYPE_CHECKING:
-    from onyx.db.models import Prompt
+    from onyx.db.models import Persona


 class LlmDoc(BaseModel):
@@ -171,6 +171,7 @@ class PromptOverrideConfig(BaseModel):
    system_prompt: str
    task_prompt: str = ""
    datetime_aware: bool = True
+    include_citations: bool = True


 class PersonaOverrideConfig(BaseModel):
@@ -185,7 +186,7 @@ class PersonaOverrideConfig(BaseModel):
    llm_model_version_override: str | None = None

    prompts: list[PromptOverrideConfig] = Field(default_factory=list)
-    prompt_ids: list[int] = Field(default_factory=list)
+    # Note: prompt_ids removed - prompts are now embedded in personas

    document_set_ids: list[int] = Field(default_factory=list)
    tools: list[ToolConfig] = Field(default_factory=list)
@@ -270,7 +271,7 @@ class PromptConfig(BaseModel):

    @classmethod
    def from_model(
-        cls, model: "Prompt", prompt_override: PromptOverride | None = None
+        cls, model: "Persona", prompt_override: PromptOverride | None = None
    ) -> "PromptConfig":
        override_system_prompt = (
            prompt_override.system_prompt if prompt_override else None
@@ -278,8 +279,8 @@ class PromptConfig(BaseModel):
        override_task_prompt = prompt_override.task_prompt if prompt_override else None

        return cls(
-            system_prompt=override_system_prompt or model.system_prompt,
-            task_prompt=override_task_prompt or model.task_prompt,
+            system_prompt=override_system_prompt or model.system_prompt or "",
+            task_prompt=override_task_prompt or model.task_prompt or "",
            datetime_aware=model.datetime_aware,
        )

--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -5,6 +5,7 @@ from collections.abc import Callable
 from collections.abc import Iterator
 from typing import cast
 from typing import Protocol
+from uuid import UUID

 from sqlalchemy.orm import Session

@@ -18,6 +19,7 @@ from onyx.chat.models import AnswerStyleConfig
 from onyx.chat.models import ChatBasicResponse
 from onyx.chat.models import CitationConfig
 from onyx.chat.models import DocumentPruningConfig
+from onyx.chat.models import LlmDoc
 from onyx.chat.models import MessageResponseIDInfo
 from onyx.chat.models import MessageSpecificCitations
 from onyx.chat.models import PromptConfig
@@ -35,6 +37,7 @@ from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from onyx.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH
 from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from onyx.configs.chat_configs import SELECTED_SECTIONS_MAX_WINDOW_PERCENTAGE
+from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import MessageType
 from onyx.configs.constants import MilestoneRecordType
 from onyx.configs.constants import NO_AUTH_USER_ID
@@ -63,9 +66,13 @@ from onyx.db.models import SearchDoc as DbSearchDoc
 from onyx.db.models import ToolCall
 from onyx.db.models import User
 from onyx.db.persona import get_persona_by_id
+from onyx.db.projects import get_project_instructions
+from onyx.db.projects import get_user_files_from_project
 from onyx.db.search_settings import get_current_search_settings
 from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.models import FileDescriptor
+from onyx.file_store.models import InMemoryChatFile
+from onyx.file_store.utils import build_frontend_file_url
 from onyx.file_store.utils import load_all_chat_files
 from onyx.kg.models import KGException
 from onyx.llm.exceptions import GenAIDisabledException
@@ -88,12 +95,12 @@ from onyx.tools.tool import Tool
 from onyx.tools.tool_constructor import construct_tools
 from onyx.tools.tool_constructor import CustomToolConfig
 from onyx.tools.tool_constructor import ImageGenerationToolConfig
-from onyx.tools.tool_constructor import InternetSearchToolConfig
 from onyx.tools.tool_constructor import SearchToolConfig
-from onyx.tools.tool_implementations.internet_search.internet_search_tool import (
-    InternetSearchTool,
-)
+from onyx.tools.tool_constructor import WebSearchToolConfig
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
+from onyx.tools.tool_implementations.web_search.web_search_tool import (
+    WebSearchTool,
+)
 from onyx.utils.logger import setup_logger
 from onyx.utils.long_term_log import LongTermLogger
 from onyx.utils.telemetry import mt_cloud_telemetry
@@ -101,6 +108,7 @@ from onyx.utils.timing import log_function_time
 from onyx.utils.timing import log_generator_function_time
 from shared_configs.contextvars import get_current_tenant_id

+
 logger = setup_logger()
 ERROR_TYPE_CANCELLED = "cancelled"

@@ -119,6 +127,55 @@ class PartialResponse(Protocol):
    ) -> ChatMessage: ...


+def _build_project_llm_docs(
+    project_file_ids: list[str] | None,
+    in_memory_user_files: list[InMemoryChatFile] | None,
+) -> list[LlmDoc]:
+    """Construct `LlmDoc` objects for project-scoped user files for citation flow."""
+    project_llm_docs: list[LlmDoc] = []
+    if not project_file_ids or not in_memory_user_files:
+        return project_llm_docs
+
+    project_file_id_set = set(project_file_ids)
+    for f in in_memory_user_files:
+        # Only include files that belong to the project (not ad-hoc uploads)
+        if project_file_id_set and (f.file_id in project_file_id_set):
+            try:
+                text_content = f.content.decode("utf-8", errors="ignore")
+            except Exception:
+                text_content = ""
+
+            # Build a short blurb from the file content for better UI display
+            blurb = (
+                (text_content[:200] + "...")
+                if len(text_content) > 200
+                else text_content
+            )
+
+            # Provide basic metadata to improve SavedSearchDoc display
+            file_metadata: dict[str, str | list[str]] = {
+                "filename": f.filename or str(f.file_id),
+                "file_type": f.file_type.value,
+            }
+
+            project_llm_docs.append(
+                LlmDoc(
+                    document_id=str(f.file_id),
+                    content=text_content,
+                    blurb=blurb,
+                    semantic_identifier=f.filename or str(f.file_id),
+                    source_type=DocumentSource.USER_FILE,
+                    metadata=file_metadata,
+                    updated_at=None,
+                    link=build_frontend_file_url(str(f.file_id)),
+                    source_links=None,
+                    match_highlights=None,
+                )
+            )
+
+    return project_llm_docs
+
+
 def _translate_citations(
    citations_list: list[CitationInfo], db_docs: list[DbSearchDoc]
 ) -> MessageSpecificCitations:
@@ -159,12 +216,10 @@ def _get_force_search_settings(
            override_kwargs=search_tool_override_kwargs,
        )

-    internet_search_available = any(
-        isinstance(tool, InternetSearchTool) for tool in tools
-    )
+    web_search_available = any(isinstance(tool, WebSearchTool) for tool in tools)
    search_tool_available = any(isinstance(tool, SearchTool) for tool in tools)

-    if not internet_search_available and not search_tool_available:
+    if not web_search_available and not search_tool_available:
        # Does not matter much which tool is set here as force is false and neither tool is available
        return ForceUseTool(force_use=False, tool_name=SearchTool._NAME)
    # Currently, the internet search tool does not support query override
@@ -199,9 +254,7 @@ def _get_force_search_settings(

    return ForceUseTool(
        force_use=False,
-        tool_name=(
-            SearchTool._NAME if search_tool_available else InternetSearchTool._NAME
-        ),
+        tool_name=(SearchTool._NAME if search_tool_available else WebSearchTool._NAME),
        args=args,
        override_kwargs=None,
    )
@@ -335,11 +388,8 @@ def stream_chat_message_objects(
                properties=None,
            )

-        # If a prompt override is specified via the API, use that with highest priority
-        # but for saving it, we are just mapping it to an existing prompt
-        prompt_id = new_msg_req.prompt_id
-        if prompt_id is None and persona.prompts:
-            prompt_id = sorted(persona.prompts, key=lambda x: x.id)[-1].id
+        # Note: prompt configuration is now embedded in the persona
+        # No need for separate prompt_id handling

        if reference_doc_ids is None and retrieval_options is None:
            raise RuntimeError(
@@ -399,7 +449,6 @@ def stream_chat_message_objects(
            user_message = create_new_chat_message(
                chat_session_id=chat_session_id,
                parent_message=parent_message,
-                prompt_id=prompt_id,
                message=message_text,
                token_count=len(llm_tokenizer_encode_func(message_text)),
                message_type=MessageType.USER,
@@ -444,26 +493,29 @@ def stream_chat_message_objects(
        files = load_all_chat_files(history_msgs, new_msg_req.file_descriptors)
        req_file_ids = [f["id"] for f in new_msg_req.file_descriptors]
        latest_query_files = [file for file in files if file.file_id in req_file_ids]
-        user_file_ids = new_msg_req.user_file_ids or []
-        user_folder_ids = new_msg_req.user_folder_ids or []
+        user_file_ids: list[UUID] = []

        if persona.user_files:
-            for file in persona.user_files:
-                user_file_ids.append(file.id)
-        if persona.user_folders:
-            for folder in persona.user_folders:
-                user_folder_ids.append(folder.id)
+            for uf in persona.user_files:
+                user_file_ids.append(uf.id)
+
+        if new_msg_req.current_message_files:
+            for fd in new_msg_req.current_message_files:
+                uid = fd.get("user_file_id")
+                if uid is not None:
+                    user_file_id = UUID(uid)
+                    user_file_ids.append(user_file_id)

        # Load in user files into memory and create search tool override kwargs if needed
-        # if we have enough tokens and no folders, we don't need to use search
+        # if we have enough tokens, we don't need to use search
        # we can just pass them into the prompt directly
        (
            in_memory_user_files,
            user_file_models,
            search_tool_override_kwargs_for_user_files,
        ) = parse_user_files(
-            user_file_ids=user_file_ids,
-            user_folder_ids=user_folder_ids,
+            user_file_ids=user_file_ids or [],
+            project_id=chat_session.project_id,
            db_session=db_session,
            persona=persona,
            actual_user_input=message_text,
@@ -472,16 +524,37 @@ def stream_chat_message_objects(
        if not search_tool_override_kwargs_for_user_files:
            latest_query_files.extend(in_memory_user_files)

+        project_file_ids = []
+        if chat_session.project_id:
+            project_file_ids.extend(
+                [
+                    file.file_id
+                    for file in get_user_files_from_project(
+                        chat_session.project_id, user_id, db_session
+                    )
+                ]
+            )
+
+        # we don't want to attach project files to the user message
        if user_message:
            attach_files_to_chat_message(
                chat_message=user_message,
                files=[
-                    new_file.to_file_descriptor() for new_file in latest_query_files
+                    new_file.to_file_descriptor()
+                    for new_file in latest_query_files
+                    if project_file_ids is not None
+                    and (new_file.file_id not in project_file_ids)
                ],
                db_session=db_session,
                commit=False,
            )

+        # Build project context docs for citation flow if project files are present
+        project_llm_docs: list[LlmDoc] = _build_project_llm_docs(
+            project_file_ids=project_file_ids,
+            in_memory_user_files=in_memory_user_files,
+        )
+
        selected_db_search_docs = None
        selected_sections: list[InferenceSection] | None = None
        if reference_doc_ids:
@@ -559,18 +632,22 @@ def stream_chat_message_objects(
                ].datetime_aware,
            )
        elif prompt_override:
-            if not final_msg.prompt:
-                raise ValueError(
-                    "Prompt override cannot be applied, no base prompt found."
-                )
+            # Apply prompt override on top of persona-embedded prompt
            prompt_config = PromptConfig.from_model(
-                final_msg.prompt,
+                persona,
                prompt_override=prompt_override,
            )
        else:
-            prompt_config = PromptConfig.from_model(
-                final_msg.prompt or persona.prompts[0]
+            prompt_config = PromptConfig.from_model(persona)
+
+        # Retrieve project-specific instructions if this chat session is associated with a project.
+        project_instructions: str | None = (
+            get_project_instructions(
+                db_session=db_session, project_id=chat_session.project_id
            )
+            if persona.is_default_persona
+            else None
+        )  # if the persona is not default, we don't want to use the project instructions

        answer_style_config = AnswerStyleConfig(
            citation_config=CitationConfig(
@@ -578,6 +655,7 @@ def stream_chat_message_objects(
            ),
            structured_response_format=new_msg_req.structured_response_format,
        )
+        has_project_files = project_file_ids is not None and len(project_file_ids) > 0

        tool_dict = construct_tools(
            persona=persona,
@@ -587,9 +665,17 @@ def stream_chat_message_objects(
            llm=llm,
            fast_llm=fast_llm,
            run_search_setting=(
-                retrieval_options.run_search
-                if retrieval_options
-                else OptionalSearchSetting.AUTO
+                OptionalSearchSetting.NEVER
+                if (
+                    chat_session.project_id
+                    and not has_project_files
+                    and persona.is_default_persona
+                )
+                else (
+                    retrieval_options.run_search
+                    if retrieval_options
+                    else OptionalSearchSetting.AUTO
+                )
            ),
            search_tool_config=SearchToolConfig(
                answer_style_config=answer_style_config,
@@ -603,7 +689,7 @@ def stream_chat_message_objects(
                latest_query_files=latest_query_files,
                bypass_acl=bypass_acl,
            ),
-            internet_search_tool_config=InternetSearchToolConfig(
+            internet_search_tool_config=WebSearchToolConfig(
                answer_style_config=answer_style_config,
                document_pruning_config=document_pruning_config,
            ),
@@ -616,6 +702,7 @@ def stream_chat_message_objects(
                additional_headers=custom_tool_additional_headers,
            ),
            allowed_tool_ids=new_msg_req.allowed_tool_ids,
+            slack_context=new_msg_req.slack_context,  # Pass Slack context from request
        )

        tools: list[Tool] = []
@@ -630,6 +717,7 @@ def stream_chat_message_objects(
        message_history = [
            PreviousMessage.from_chat_message(msg, files) for msg in history_msgs
        ]
+
        if not search_tool_override_kwargs_for_user_files and in_memory_user_files:
            yield UserKnowledgeFilePacket(
                user_files=[
@@ -637,6 +725,8 @@ def stream_chat_message_objects(
                        id=str(file.file_id), type=file.file_type, name=file.filename
                    )
                    for file in in_memory_user_files
+                    if project_file_ids is not None
+                    and (file.file_id not in project_file_ids)
                ]
            )

@@ -655,6 +745,10 @@ def stream_chat_message_objects(
            single_message_history=single_message_history,
        )

+        if project_llm_docs and not search_tool_override_kwargs_for_user_files:
+            # Store for downstream streaming to wire citations and final_documents
+            prompt_builder.context_llm_docs = project_llm_docs
+
        # LLM prompt building, response capturing, etc.
        answer = Answer(
            prompt_builder=prompt_builder,
@@ -683,6 +777,7 @@ def stream_chat_message_objects(
            db_session=db_session,
            use_agentic_search=new_msg_req.use_agentic_search,
            skip_gen_ai_answer_generation=new_msg_req.skip_gen_ai_answer_generation,
+            project_instructions=project_instructions,
        )

        # Process streamed packets using the new packet processing module
--- a/backend/onyx/chat/prompt_builder/answer_prompt_builder.py
+++ b/backend/onyx/chat/prompt_builder/answer_prompt_builder.py
@@ -4,9 +4,9 @@ from typing import cast
 from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.messages import SystemMessage
-from pydantic import BaseModel
 from pydantic.v1 import BaseModel as BaseModel__v1

+from onyx.chat.models import LlmDoc
 from onyx.chat.models import PromptConfig
 from onyx.chat.prompt_builder.citations_prompt import compute_max_llm_input_tokens
 from onyx.chat.prompt_builder.utils import translate_history_to_basemessages
@@ -76,6 +76,7 @@ def default_build_user_message(
        if prompt_config.task_prompt
        else user_query
    )
+
    user_prompt = user_prompt.strip()
    tag_handled_prompt = handle_onyx_date_awareness(user_prompt, prompt_config)
    user_msg = HumanMessage(
@@ -132,6 +133,10 @@ class AnswerPromptBuilder:
        self.raw_user_uploaded_files = raw_user_uploaded_files
        self.single_message_history = single_message_history

+        # Optional: if the prompt includes explicit context documents (e.g., project files),
+        # store them here so downstream streaming can reference them for citation mapping.
+        self.context_llm_docs: list[LlmDoc] | None = None
+
    def update_system_prompt(self, system_message: SystemMessage | None) -> None:
        if not system_message:
            self.system_message_and_token_cnt = None
@@ -196,10 +201,6 @@ class AnswerPromptBuilder:


 # Stores some parts of a prompt builder as needed for tool calls
-class PromptSnapshot(BaseModel):
-    raw_message_history: list[PreviousMessage]
-    raw_user_query: str
-    built_prompt: list[BaseMessage]


 # TODO: rename this? AnswerConfig maybe?
--- a/backend/onyx/chat/prompt_builder/citations_prompt.py
+++ b/backend/onyx/chat/prompt_builder/citations_prompt.py
@@ -1,13 +1,11 @@
 from langchain.schema.messages import HumanMessage
 from langchain.schema.messages import SystemMessage
-from sqlalchemy.orm import Session

 from onyx.chat.models import LlmDoc
 from onyx.chat.models import PromptConfig
 from onyx.configs.model_configs import GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS
 from onyx.context.search.models import InferenceChunk
 from onyx.db.models import Persona
-from onyx.db.prompts import get_default_prompt
 from onyx.db.search_settings import get_multilingual_expansion
 from onyx.file_store.models import InMemoryChatFile
 from onyx.llm.factory import get_llms_for_persona
@@ -89,13 +87,12 @@ def compute_max_document_tokens(


 def compute_max_document_tokens_for_persona(
-    db_session: Session,
    persona: Persona,
    actual_user_input: str | None = None,
 ) -> int:
-    prompt = persona.prompts[0] if persona.prompts else get_default_prompt(db_session)
+    # Use the persona directly since prompts are now embedded
    return compute_max_document_tokens(
-        prompt_config=PromptConfig.from_model(prompt),
+        prompt_config=PromptConfig.from_model(persona),
        llm_config=get_main_llm_from_tuple(get_llms_for_persona(persona)).config,
        actual_user_input=actual_user_input,
    )
--- a/backend/onyx/chat/prompt_builder/schemas.py
+++ b/backend/onyx/chat/prompt_builder/schemas.py
@@ -0,0 +1,10 @@
+from langchain_core.messages import BaseMessage
+from pydantic import BaseModel
+
+from onyx.llm.models import PreviousMessage
+
+
+class PromptSnapshot(BaseModel):
+    raw_message_history: list[PreviousMessage]
+    raw_user_query: str
+    built_prompt: list[BaseMessage]
--- a/backend/onyx/chat/stream_processing/citation_processing.py
+++ b/backend/onyx/chat/stream_processing/citation_processing.py
@@ -12,6 +12,35 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


+def normalize_square_bracket_citations_to_double_with_links(text: str) -> str:
+    """
+    Normalize citation markers in the text:
+    - Convert bare double-bracket citations without links `[[n]]` to `[[n]]()`
+    - Convert single-bracket citations `[n]` to `[[n]]()`
+    Leaves existing linked citations like `[[n]](http...)` unchanged.
+    """
+    if not text:
+        return ""
+
+    # Add empty parens to bare double-bracket citations without a link: [[n]] -> [[n]]()
+    pattern_double_no_link = re.compile(r"\[\[(\d+)\]\](?!\()")
+
+    def _repl_double(match: re.Match[str]) -> str:
+        num = match.group(1)
+        return f"[[{num}]]()"
+
+    text = pattern_double_no_link.sub(_repl_double, text)
+
+    # Convert single [n] not already [[n]] to [[n]]()
+    pattern_single = re.compile(r"(?<!\[)\[(\d+)\](?!\])")
+
+    def _repl_single(match: re.Match[str]) -> str:
+        num = match.group(1)
+        return f"[[{num}]]()"
+
+    return pattern_single.sub(_repl_single, text)
+
+
 def in_code_block(llm_text: str) -> bool:
    count = llm_text.count(TRIPLE_BACKTICK)
    return count % 2 != 0
--- a/backend/onyx/chat/tool_handling/tool_response_handler.py
+++ b/backend/onyx/chat/tool_handling/tool_response_handler.py
@@ -7,7 +7,7 @@ from langchain_core.messages import ToolCall
 from onyx.chat.models import ResponsePart
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.chat.prompt_builder.answer_prompt_builder import LLMCall
-from onyx.chat.prompt_builder.answer_prompt_builder import PromptSnapshot
+from onyx.chat.prompt_builder.schemas import PromptSnapshot
 from onyx.llm.interfaces import LLM
 from onyx.tools.force import ForceUseTool
 from onyx.tools.message import build_tool_message
--- a/backend/onyx/chat/user_files/parse_user_files.py
+++ b/backend/onyx/chat/user_files/parse_user_files.py
@@ -4,6 +4,8 @@ from sqlalchemy.orm import Session

 from onyx.db.models import Persona
 from onyx.db.models import UserFile
+from onyx.db.projects import get_user_files_from_project
+from onyx.db.user_file import update_last_accessed_at_for_user_files
 from onyx.file_store.models import InMemoryChatFile
 from onyx.file_store.utils import get_user_files_as_user
 from onyx.file_store.utils import load_in_memory_chat_files
@@ -15,24 +17,24 @@ logger = setup_logger()


 def parse_user_files(
-    user_file_ids: list[int],
-    user_folder_ids: list[int],
+    user_file_ids: list[UUID],
    db_session: Session,
    persona: Persona,
    actual_user_input: str,
+    project_id: int | None,
    # should only be None if auth is disabled
    user_id: UUID | None,
 ) -> tuple[list[InMemoryChatFile], list[UserFile], SearchToolOverrideKwargs | None]:
    """
-    Parse user files and folders into in-memory chat files and create search tool override kwargs.
-    Only creates SearchToolOverrideKwargs if token overflow occurs or folders are present.
+    Parse user files and project into in-memory chat files and create search tool override kwargs.
+    Only creates SearchToolOverrideKwargs if token overflow occurs.

    Args:
        user_file_ids: List of user file IDs to load
-        user_folder_ids: List of user folder IDs to load
        db_session: Database session
        persona: Persona to calculate available tokens
        actual_user_input: User's input message for token calculation
+        project_id: Project ID to validate file ownership
        user_id: User ID to validate file ownership

    Returns:
@@ -40,43 +42,61 @@ def parse_user_files(
            loaded user files,
            user file models,
            search tool override kwargs if token
-                overflow or folders present
+                overflow
        )
    """
-    # Return empty results if no files or folders specified
-    if not user_file_ids and not user_folder_ids:
+    # Return empty results if no files or project specified
+    if not user_file_ids and not project_id:
        return [], [], None

+    project_user_file_ids = []
+
+    if project_id:
+        project_user_file_ids.extend(
+            [
+                file.id
+                for file in get_user_files_from_project(project_id, user_id, db_session)
+            ]
+        )
+
+    # Combine user-provided and project-derived user file IDs
+    combined_user_file_ids = user_file_ids + project_user_file_ids or []
+
    # Load user files from the database into memory
    user_files = load_in_memory_chat_files(
-        user_file_ids or [],
-        user_folder_ids or [],
+        combined_user_file_ids,
        db_session,
    )

    user_file_models = get_user_files_as_user(
-        user_file_ids or [],
-        user_folder_ids or [],
+        combined_user_file_ids,
        user_id,
        db_session,
    )

+    # Update last accessed at for the user files which are used in the chat
+    if user_file_ids or project_user_file_ids:
+        # update_last_accessed_at_for_user_files expects list[UUID]
+        update_last_accessed_at_for_user_files(
+            combined_user_file_ids,
+            db_session,
+        )
+
    # Calculate token count for the files, need to import here to avoid circular import
    # TODO: fix this
-    from onyx.db.user_documents import calculate_user_files_token_count
+    from onyx.db.user_file import calculate_user_files_token_count
    from onyx.chat.prompt_builder.citations_prompt import (
        compute_max_document_tokens_for_persona,
    )

+    # calculate_user_files_token_count now expects list[UUID]
    total_tokens = calculate_user_files_token_count(
-        user_file_ids or [],
-        user_folder_ids or [],
+        combined_user_file_ids,
        db_session,
    )

    # Calculate available tokens for documents based on prompt, user input, etc.
    available_tokens = compute_max_document_tokens_for_persona(
-        db_session=db_session,
        persona=persona,
        actual_user_input=actual_user_input,
    )
@@ -87,20 +107,22 @@ def parse_user_files(

    have_enough_tokens = total_tokens <= available_tokens

-    # If we have enough tokens and no folders, we don't need search
+    # If we have enough tokens, we don't need search
    # we can just pass them into the prompt directly
-    if have_enough_tokens and not user_folder_ids:
+    if have_enough_tokens:
        # No search tool override needed - files can be passed directly
        return user_files, user_file_models, None

-    # Token overflow or folders present - need to use search tool
+    # Token overflow - need to use search tool
    override_kwargs = SearchToolOverrideKwargs(
        force_no_rerank=have_enough_tokens,
        alternate_db_session=None,
        retrieved_sections_callback=None,
        skip_query_analysis=have_enough_tokens,
-        user_file_ids=user_file_ids,
-        user_folder_ids=user_folder_ids,
+        user_file_ids=user_file_ids or [],
+        project_id=(
+            project_id if persona.is_default_persona else None
+        ),  # if the persona is not default, we don't want to use the project files
    )

    return user_files, user_file_models, override_kwargs
--- a/backend/onyx/configs/agent_configs.py
+++ b/backend/onyx/configs/agent_configs.py
@@ -379,4 +379,11 @@ AGENT_MAX_TOKENS_HISTORY_SUMMARY = int(
    or AGENT_DEFAULT_MAX_TOKENS_HISTORY_SUMMARY
 )

+# Parameters for the Thoughtful/Deep Research flows
+TF_DR_TIMEOUT_LONG = int(os.environ.get("TF_DR_TIMEOUT_LONG") or 120)
+TF_DR_TIMEOUT_SHORT = int(os.environ.get("TF_DR_TIMEOUT_SHORT") or 60)
+
+
+TF_DR_DEFAULT_FAST = (os.environ.get("TF_DR_DEFAULT_FAST") or "False").lower() == "true"
+
 GRAPH_VERSION_NAME: str = "a"
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
SubashMohan	78f1fb5bf4	fix(projects): Fix Migration (#5550 )	2025-09-30 12:56:40 -07:00
Evan Lohn	6a8a214324	fix: avoid attempting to retrieve with non-org owners (#5555 )	2025-09-30 12:55:03 -07:00
Justin Tahara	884266c009	fix(saml): Update the route to take GET's and transform to POST (#5554 )	2025-09-30 11:28:07 -07:00
Chris Weaver	2c422215e6	Fix prod compose (#5553 )	2025-09-30 10:41:38 -07:00
joachim-danswer	32fe185bb4	fix: set gpt-5 thinking setting (#5539 )	2025-09-30 09:57:36 -07:00
Chris Weaver	c2758a28d5	fix: project migration tweak (#5544 )	2025-09-29 19:57:09 -07:00
Justin Tahara	5cda2e0173	feat(LLM): Add Claude Sonnet 4.5 (#5543 )	2025-09-29 17:58:18 -07:00
Evan Lohn	9e885a68b3	feat: mcp client v2 (#5481 )	2025-09-29 17:01:32 -07:00
Justin Tahara	376fc86b0c	fix(saml): GET Method for SAML Callback (#5538 )	2025-09-29 15:08:44 -07:00
Chris Weaver	2eb1444d80	fix: more test hardening (#5537 )	2025-09-29 13:54:56 -07:00
SubashMohan	bd6ebe4718	feat(chat): add popup handling for image file selection in ChatInputBar (#5536 )	2025-09-29 11:02:18 -07:00
Chris Weaver	691d63bc0f	fix: remove console.log (#5533 )	2025-09-29 10:54:54 -07:00
Chris Weaver	dfd4d9abef	fix: playwright tests (#5522 )	2025-09-29 09:04:10 -07:00
SubashMohan	4cb39bc150	fix chat issue and change view icon (#5525 )	2025-09-29 12:28:07 +05:30
Chris Weaver	4e357478e0	fix: package-lock.json (#5530 )	2025-09-28 13:43:42 -07:00
Wenxi	b5b1b3287c	fix: update package lock after projects merge (#5514 )	2025-09-28 13:00:32 -07:00
Wenxi	2f58a972eb	fix: launch template post projects merge (#5528 )	2025-09-28 12:57:54 -07:00
Yuhong Sun	6b39d8eed9	Docker Version Check (#5523 )	2025-09-27 19:03:43 -07:00
Chris Weaver	f81c34d040	fix: editing/regeneration (#5521 )	2025-09-27 17:43:03 -07:00
Yuhong Sun	0771b1f476	SQL plaintext file (#5520 )	2025-09-27 15:36:44 -07:00
Jessica Singh	eedd2ba3fe	fix(source selection): enable all by default and persist choice (#5511 )	2025-09-26 17:15:40 -07:00
Chris Weaver	98554e5025	feat: small projects UX tweaks (#5513 )	2025-09-26 15:33:37 -07:00
Justin Tahara	dcd2cad6b4	fix(infra): Increment Helm Version for Projects (#5512 )	2025-09-26 13:59:27 -07:00
Chris Weaver	189f4bb071	fix: add bitbucket env vars (#5510 )	2025-09-26 12:38:59 -07:00
SubashMohan	7eeab8fb80	feat(projects): add project creation and management (#5248 ) Co-authored-by: Weves <chrisweaver101@gmail.com>	2025-09-26 12:05:20 -07:00
Justin Tahara	60f83dd0db	fix(gmail): Skip over emails that don't have gmail enabled (#5506 )	2025-09-25 19:57:47 -07:00
Jessica Singh	2618602fd6	fix(source filter): dark mode support (#5505 )	2025-09-25 18:10:48 -07:00
Chris Weaver	b80f96de85	fix: LlmPopover after filling in an initial model (#5504 )	2025-09-25 17:09:22 -07:00
edwin-onyx	74a15b2c01	fix(infra): fix some dependency hells and add some lazy loading to reduce celery worker RAM usage (#5478 ) Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>	2025-09-25 16:12:26 -07:00
Jessica Singh	408b80ce51	feat(source selection): adding source selection for internal search in chat (#5455 )	2025-09-25 16:12:02 -07:00
Wenxi	e82b68c1b0	fix: update seeded docs connector name (#5502 )	2025-09-25 15:58:54 -07:00
Justin Tahara	af5eec648b	fix(playwright): Add new fix for Playwright test (#5503 )	2025-09-25 15:34:24 -07:00
Chris Weaver	d186c5e82e	feat(docker): Add DEV_MODE flag for exposing service ports (#5499 ) Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: justin-tahara <justintahara@gmail.com>	2025-09-25 15:08:20 -07:00
Justin Tahara	4420a50aed	fix(github): Revert cache being turned off (#5487 )	2025-09-25 14:07:58 -07:00
Justin Tahara	9caa6ea7ff	feat(infra): Default to HPA w/ KEDA option (#5480 )	2025-09-25 11:58:19 -07:00
Yuhong Sun	8d7b217d33	Deployment README (#5496 )	2025-09-25 11:34:30 -07:00
Yuhong Sun	57908769f1	Port 80 (#5495 )	2025-09-25 11:10:41 -07:00
Yuhong Sun	600cec7c89	Robust Install (#5494 )	2025-09-25 10:08:52 -07:00
Yuhong Sun	bb8ea536c4	Update README.md (#5492 )	2025-09-25 09:05:50 -07:00
Yuhong Sun	f97869b91e	README (#5486 )	2025-09-24 20:33:36 -07:00
Justin Tahara	aa5be56884	fix(github): Remove the Backport workflow (#5484 )	2025-09-24 19:33:18 -07:00
Justin Tahara	7580178c95	fix(github): Fix Integration Tests (#5485 )	2025-09-24 19:30:07 -07:00
Yuhong Sun	2e0bc8caf0	feat: Easy Install (#5461 )	2025-09-24 15:31:45 -07:00
Chris Weaver	f9bd03c7f0	refactor: change venv activation (#5463 )	2025-09-23 16:07:46 -07:00
Jessica Singh	77466e1f2b	feat(slack bot): add federated search (#5275 ) Co-authored-by: Jessica Singh <jessicasingh@Mac.attlocal.net> Co-authored-by: Jessica Singh <jessicasingh@mac.lan>	2025-09-22 19:19:44 -07:00
Justin Tahara	8dd79345ed	fix(sharepoint): Add secondary filter for embedded images (#5473 )	2025-09-22 18:48:47 -07:00
Justin Tahara	a049835c49	fix(processing): Mime types for Image Summarization (#5471 )	2025-09-22 18:48:31 -07:00
Yuhong Sun	d186d8e8ed	Remove incredibly strict password reqs (#5470 )	2025-09-22 17:25:34 -07:00
Yuhong Sun	082897eb9b	Fix Toggles (#5469 )	2025-09-22 17:09:43 -07:00
Yuhong Sun	e38f79dec5	Remove confusing text (#5468 )	2025-09-22 15:37:54 -07:00
SubashMohan	26e7bba25d	Fix/connector page stack depth limit (#5417 )	2025-09-22 19:23:53 +05:30
edwin-onyx	3cde4ef77f	fix(infra): create pre commit script and port vertex as lazy import (#5453 ) Co-authored-by: Claude <noreply@anthropic.com>	2025-09-21 20:43:28 -07:00
Evan Lohn	f4d135d710	fix: sharepoint memory via excel parsing (#5444 )	2025-09-19 17:10:27 -07:00
Richard Guan	6094f70ac8	fix: braintrust masking was over truncating (#5458 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-09-19 14:10:29 -07:00
Richard Guan	a90e58b39b	feat: braintrust tracing (#5450 )	2025-09-18 18:17:38 -07:00
Evan Lohn	e82e3141ed	feat: zendesk rate limiting (#5452 )	2025-09-18 16:35:48 -07:00
edwin-onyx	f8e9060bab	fix(infra): remove transformers dependency for api server (#5441 ) Co-authored-by: Edwin Luo <edwinluo@3ef5a334-3d74-4dbf-b1c8-d57dc87d5638.attlocal.net> Co-authored-by: Claude <noreply@anthropic.com>	2025-09-18 12:59:12 -07:00
Jessica Singh	24831fa1a1	fix(slack): swapped checkpoint index (#5427 )	2025-09-18 11:09:31 -07:00
edwin-onyx	f6a0e69b2a	fix(infra): remove setfit dependency from api server (#5449 )	2025-09-17 23:48:47 -07:00
Richard Guan	0394eaea7f	fix: copy over tests/__init__.py on docker build (#5443 )	2025-09-17 17:12:03 -07:00
Wenxi	898b8c316e	feat: docs link on connector creation (#5447 )	2025-09-17 17:06:35 -07:00
Chris Weaver	4b0c6d1e54	fix: image gen tool causing error (#5445 )	2025-09-17 16:39:54 -07:00
Justin Tahara	da7dc33afa	fix(Federated Slack): Persist Document Set for Federated Connectors (#5442 )	2025-09-17 13:52:11 -07:00
Richard Guan	c558732ddd	feat: eval pipeline (#5369 )	2025-09-17 12:17:14 -07:00
Chris Weaver	339ad9189b	fix: slackbot error (#5430 )	2025-09-16 23:25:34 -07:00
Richard Guan	32d5e408b8	fix: HF Cache Warmup Fix and Celery Pool Management (#5435 )	2025-09-16 18:57:52 -07:00
Justin Tahara	14ead457d9	fix(infra): Update chart releaser (#5434 )	2025-09-16 16:58:43 -07:00
Justin Tahara	458cd7e832	fix(infra): Add KEDA Dependency (#5433 )	2025-09-16 16:52:24 -07:00
Justin Tahara	770a2692e9	Revert "fix(infra): Add KEDA Dependency" (#5432 )	2025-09-16 16:48:18 -07:00
Justin Tahara	5dd99b6acf	fix(infra): Add KEDA Dependency (#5431 )	2025-09-16 16:45:41 -07:00
Chris Weaver	6c7eb89374	fix: remove credential file log (#5429 )	2025-09-16 15:48:33 -07:00
eric-zadara	fd11c16c6d	feat(infra): Decouple helm chart from bitnami (#5200 ) Co-authored-by: eric-zadara <eric-zadara@users.noreply.github.com>	2025-09-16 14:56:04 -07:00
Chris Weaver	11ec603c37	fix: Improve datetime replacement (#5425 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-09-16 14:49:06 -07:00
Justin Tahara	495d4cac44	feat(infra): Migrate from HPA to KEDA for all Services (#5370 )	2025-09-16 13:56:50 -07:00
Wenxi Onyx	fd2d74ae2e	onyx mcp server	2025-09-16 11:06:00 -07:00
Evan Lohn	4c7a2e486b	fix: skip huge files on sdk fallback (#5421 )	2025-09-15 18:24:06 -07:00
Chris Weaver	01e0ba6270	fix: tool seeding migration (#5422 )	2025-09-15 16:46:01 -07:00
Wenxi	227dfc4a05	fix: skip excluded img files in sharepoint (#5418 )	2025-09-15 11:30:19 -07:00
Chris Weaver	c3702b76b6	docs: add agent files (#5412 )	2025-09-14 20:07:18 -07:00
Chris Weaver	bb239d574c	feat: single default assistant (#5351 )	2025-09-14 20:05:33 -07:00
Chris Weaver	172e5f0e24	feat: Move reg IT to parallel + blacksmith and have MIT only run on merge q… (#5413 )	2025-09-13 17:33:45 -07:00
Nils	26b026fb88	SharePoint Connector Fix - Nested Subfolder Indexing (#5404 ) Co-authored-by: nsklei <nils.kleinrahm@pledoc.de>	2025-09-13 11:33:01 +00:00
joachim-danswer	870629e8a9	fix: Azure adjustment (#5410 )	2025-09-13 00:03:37 +00:00
danielkravets	a547112321	feat: bitbucket connector (#5294 )	2025-09-12 18:15:09 -07:00
joachim-danswer	da5a94815e	fix: initial response quality, particularly for General assistant (#5399 )	2025-09-12 00:14:49 -07:00
Jessica Singh	e024472b74	fix(federated-slack): pass in valid query (#5402 )	2025-09-11 19:27:43 -07:00
Chris Weaver	e74855e633	feat: use private registry (#5401 )	2025-09-11 18:20:56 -07:00
Justin Tahara	e4c26a933d	fix(infra): Fix helm test timeout (#5386 )	2025-09-11 18:19:07 -07:00
Chris Weaver	36c96f2d98	fix: playwright (#5396 )	2025-09-11 14:06:03 -07:00
Justin Tahara	1ea94dcd8d	fix(security): Remove Hard Fail from Trivy (#5394 )	2025-09-11 10:35:26 -07:00
Wenxi	2b1c5a0755	fix: remove unneeded dependency from requirements (#5390 )	2025-09-10 21:49:02 -07:00
Chris Weaver	82b5f806ab	feat: Improve migration (#5391 )	2025-09-10 19:29:11 -07:00
Chris Weaver	6340c517d1	fix: missing connectors section (#5387 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-09-10 19:28:56 -07:00
joachim-danswer	3baae2d4f0	fix: tf/dr flow improvements (#5380 )	2025-09-10 16:39:19 -07:00
Chris Weaver	d7c223ddd4	feat: playwright test speed improvement (#5388 )	2025-09-10 16:19:56 -07:00
Chris Weaver	df4917243b	fix: parallelized IT (#5389 )	2025-09-10 14:37:36 -07:00
Justin Tahara	a79ab713ce	feat(infra): Adding rety to Trivy tests (#5383 )	2025-09-10 14:13:58 -07:00
Chris Weaver	d1f7cee959	feat: parallelized integration tests (#5021 ) Co-authored-by: Claude <noreply@anthropic.com>	2025-09-10 12:15:02 -07:00
Justin Tahara	a3f41e20da	feat(infra): Add Node Selector option to all Templates (#5384 )	2025-09-10 10:23:54 -07:00
Chris Weaver	458ed93da0	feat: remove prompt table (#5348 )	2025-09-10 10:21:57 -07:00
Chris Weaver	273d073bd7	fix: non-image gen models (#5381 )	2025-09-09 15:52:03 -07:00
Wenxi	9455c8e5ae	fix: add back reverted changes to readme (#5377 )	2025-09-09 10:23:33 -07:00
Justin Tahara	d45d4389a0	Revert "fix: update contribution guide" (#5376 ) Co-authored-by: Wenxi <wenxi@onyx.app>	2025-09-09 09:37:16 -07:00
Chris Weaver	bd901c0da1	fix: playwright tests (#5372 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-09-09 00:29:52 -07:00
Wenxi	2192605c95	feat: Bedrock API Keys & filter available models (#5343 ) Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>	2025-09-08 18:50:04 -07:00
Wenxi	d248d2f4e9	refactor: update seeded docs (#5364 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2025-09-08 18:06:29 -07:00
Chris Weaver	331c53871a	fix: image gen display (#5367 )	2025-09-08 17:47:17 -07:00
SubashMohan	f62d0d9144	feat(admin/connectors): Disable Auto Sync for unsupported auth; add disabled dropdown + tooltip (#5358 )	2025-09-08 21:39:47 +00:00
Chris Weaver	427945e757	fix: model server build (#5362 )	2025-09-08 14:00:33 -07:00
Wenxi	e55cdc6250	fix: new docs links (#5363 )	2025-09-08 13:49:19 -07:00
sktbcpraha	6a01db9ff2	fix: IMAP - mail processing fixes (#5360 )	2025-09-08 12:11:09 -07:00
Richard Guan	82e9df5c22	fix: various bug bash improvements (#5330 )	2025-09-07 23:17:01 -07:00