Initial codex impl

2026-02-17 15:55:45 +00:00 · 2025-09-24 14:39:48 -07:00
762 changed files with 32414 additions and 34396 deletions
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -6,6 +6,9 @@

 [Describe the tests you ran to verify your changes]

-## Additional Options
+## Backporting (check the box to trigger backport action)

+Note: You have to check that the action passes, otherwise resolve the conflicts manually and tag the patches.
+
+- [ ] This PR should be backported (make sure to check that the backport attempt succeeds)
 - [ ] [Optional] Override Linear Check
--- a/.github/workflows/check-lazy-imports.yml
+++ b/.github/workflows/check-lazy-imports.yml
@@ -1,24 +0,0 @@
-name: Check Lazy Imports
-
-on:
-  merge_group:
-  pull_request:
-    branches:
-      - main
-      - 'release/**'
-
-jobs:
-  check-lazy-imports:
-    runs-on: ubuntu-latest
-
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v4
-
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.11'
-
-    - name: Check lazy imports
-      run: python3 backend/scripts/check_lazy_imports.py
--- a/.github/workflows/docker-build-push-backend-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml
@@ -8,9 +8,9 @@ on:
 env:
  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }}
  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
+  
+  # don't tag cloud images with "latest"
+  LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}

 jobs:
  build-and-push:
@@ -33,16 +33,7 @@ jobs:
        run: |
          platform=${{ matrix.platform }}
          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-
+          
      - name: Checkout code
        uses: actions/checkout@v4

@@ -55,8 +46,7 @@ jobs:
            latest=false
          tags: |
            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}
            
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@@ -129,8 +119,7 @@ jobs:
            latest=false
          tags: |
            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}

      - name: Login to Docker Hub
        uses: docker/login-action@v3
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -11,8 +11,8 @@ env:
  BUILDKIT_PROGRESS: plain
  DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }}

-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
+  # don't tag cloud images with "latest"
+  LATEST_TAG: ${{ contains(github.ref_name, 'latest') && !contains(github.ref_name, 'cloud') }}
  
 jobs:

@@ -145,15 +145,6 @@ jobs:
    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on: ubuntu-latest
    steps:
-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] && [[ "${{ github.ref_name }}" != *"cloud"* ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-
      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
@@ -166,16 +157,11 @@ jobs:
          docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} \
            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
            ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          if [[ "${{ steps.check_version.outputs.is_stable }}" == "true" ]]; then
+          if [[ "${{ env.LATEST_TAG }}" == "true" ]]; then
            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:latest \
              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
          fi
-          if [[ "${{ env.EDGE_TAG }}" == "true" ]]; then
-            docker buildx imagetools create -t ${{ env.REGISTRY_IMAGE }}:edge \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-amd64 \
-              ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }}-arm64
-          fi

      - name: Run Trivy vulnerability scanner
        uses: nick-fields/retry@v3
--- a/.github/workflows/docker-build-push-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-web-container-on-tag.yml
@@ -7,10 +7,7 @@ on:

 env:
  REGISTRY_IMAGE: onyxdotapp/onyx-web-server
-
-  # tag nightly builds with "edge"
-  EDGE_TAG: ${{ startsWith(github.ref_name, 'nightly-latest') }}
-
+  LATEST_TAG: ${{ contains(github.ref_name, 'latest') }}
  DEPLOYMENT: standalone

 jobs:
@@ -48,15 +45,6 @@ jobs:
          platform=${{ matrix.platform }}
          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV

-      - name: Check if stable release version
-        id: check_version
-        run: |
-          if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-            echo "is_stable=true" >> $GITHUB_OUTPUT
-          else
-            echo "is_stable=false" >> $GITHUB_OUTPUT
-          fi
-
      - name: Checkout
        uses: actions/checkout@v4

@@ -69,8 +57,7 @@ jobs:
            latest=false
          tags: |
            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
@@ -139,8 +126,7 @@ jobs:
            latest=false
          tags: |
            type=raw,value=${{ github.ref_name }}
-            type=raw,value=${{ steps.check_version.outputs.is_stable == 'true' && 'latest' || '' }}
-            type=raw,value=${{ env.EDGE_TAG == 'true' && 'edge' || '' }}
+            type=raw,value=${{ env.LATEST_TAG == 'true' && 'latest' || '' }}

      - name: Login to Docker Hub
        uses: docker/login-action@v3
--- a/.github/workflows/helm-chart-releases.yml
+++ b/.github/workflows/helm-chart-releases.yml
@@ -25,11 +25,9 @@ jobs:

      - name: Add required Helm repositories
        run: |
-          helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
+          helm repo add bitnami https://charts.bitnami.com/bitnami
          helm repo add onyx-vespa https://onyx-dot-app.github.io/vespa-helm-charts
-          helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
-          helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
-          helm repo add minio https://charts.min.io/
+          helm repo add keda https://kedacore.github.io/charts
          helm repo update

      - name: Build chart dependencies
--- a/.github/workflows/pr-backport-autotrigger.yml
+++ b/.github/workflows/pr-backport-autotrigger.yml
@@ -0,0 +1,124 @@
+name: Backport on Merge
+
+# Note this workflow does not trigger the builds, be sure to manually tag the branches to trigger the builds
+
+on:
+  pull_request:
+    types: [closed] # Later we check for merge so only PRs that go in can get backported
+
+permissions:
+  contents: write
+  actions: write
+
+jobs:
+  backport:
+    if: github.event.pull_request.merged == true
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_TOKEN: ${{ secrets.YUHONG_GH_ACTIONS }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}"
+          fetch-depth: 0
+
+      - name: Set up Git user
+        run: |
+          git config user.name "Richard Kuo [bot]"
+          git config user.email "rkuo[bot]@onyx.app"
+          git fetch --prune
+
+      - name: Check for Backport Checkbox
+        id: checkbox-check
+        run: |
+          PR_BODY="${{ github.event.pull_request.body }}"
+          if [[ "$PR_BODY" == *"[x] This PR should be backported"* ]]; then
+            echo "backport=true" >> $GITHUB_OUTPUT
+          else
+            echo "backport=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: List and sort release branches
+        id: list-branches
+        run: |
+          git fetch --all --tags
+          BRANCHES=$(git for-each-ref --format='%(refname:short)' refs/remotes/origin/release/* | sed 's|origin/release/||' | sort -Vr)
+          BETA=$(echo "$BRANCHES" | head -n 1)
+          STABLE=$(echo "$BRANCHES" | head -n 2 | tail -n 1)
+          echo "beta=release/$BETA" >> $GITHUB_OUTPUT
+          echo "stable=release/$STABLE" >> $GITHUB_OUTPUT
+          # Fetch latest tags for beta and stable
+          LATEST_BETA_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*-beta.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$" | grep -v -- "-cloud" | sort -Vr | head -n 1)
+          LATEST_STABLE_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+$" | sort -Vr | head -n 1)
+
+          # Handle case where no beta tags exist
+          if [[ -z "$LATEST_BETA_TAG" ]]; then
+            NEW_BETA_TAG="v1.0.0-beta.1"
+          else
+            NEW_BETA_TAG=$(echo $LATEST_BETA_TAG | awk -F '[.-]' '{print $1 "." $2 "." $3 "-beta." ($NF+1)}')
+          fi
+
+          # Increment latest stable tag
+          NEW_STABLE_TAG=$(echo $LATEST_STABLE_TAG | awk -F '.' '{print $1 "." $2 "." ($3+1)}')
+          echo "latest_beta_tag=$LATEST_BETA_TAG" >> $GITHUB_OUTPUT
+          echo "latest_stable_tag=$LATEST_STABLE_TAG" >> $GITHUB_OUTPUT
+          echo "new_beta_tag=$NEW_BETA_TAG" >> $GITHUB_OUTPUT
+          echo "new_stable_tag=$NEW_STABLE_TAG" >> $GITHUB_OUTPUT
+
+      - name: Echo branch and tag information
+        run: |
+          echo "Beta branch: ${{ steps.list-branches.outputs.beta }}"
+          echo "Stable branch: ${{ steps.list-branches.outputs.stable }}"
+          echo "Latest beta tag: ${{ steps.list-branches.outputs.latest_beta_tag }}"
+          echo "Latest stable tag: ${{ steps.list-branches.outputs.latest_stable_tag }}"
+          echo "New beta tag: ${{ steps.list-branches.outputs.new_beta_tag }}"
+          echo "New stable tag: ${{ steps.list-branches.outputs.new_stable_tag }}"
+
+      - name: Trigger Backport
+        if: steps.checkbox-check.outputs.backport == 'true'
+        run: |
+          set -e
+          echo "Backporting to beta ${{ steps.list-branches.outputs.beta }} and stable ${{ steps.list-branches.outputs.stable }}"
+
+          # Echo the merge commit SHA
+          echo "Merge commit SHA: ${{ github.event.pull_request.merge_commit_sha }}"
+
+          # Fetch all history for all branches and tags
+          git fetch --prune
+
+          # Reset and prepare the beta branch
+          git checkout ${{ steps.list-branches.outputs.beta }}
+          echo "Last 5 commits on beta branch:"
+          git log -n 5 --pretty=format:"%H"
+          echo ""  # Newline for formatting
+
+          # Cherry-pick the merge commit from the merged PR
+          git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
+            echo "Cherry-pick to beta failed due to conflicts."
+            exit 1
+          }
+
+          # Create new beta branch/tag
+          git tag ${{ steps.list-branches.outputs.new_beta_tag }}
+          # Push the changes and tag to the beta branch using PAT
+          git push origin ${{ steps.list-branches.outputs.beta }}
+          git push origin ${{ steps.list-branches.outputs.new_beta_tag }}
+
+          # Reset and prepare the stable branch
+          git checkout ${{ steps.list-branches.outputs.stable }}
+          echo "Last 5 commits on stable branch:"
+          git log -n 5 --pretty=format:"%H"
+          echo ""  # Newline for formatting
+
+          # Cherry-pick the merge commit from the merged PR
+          git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || {
+            echo "Cherry-pick to stable failed due to conflicts."
+            exit 1
+          }
+
+          # Create new stable branch/tag
+          git tag ${{ steps.list-branches.outputs.new_stable_tag }}
+          # Push the changes and tag to the stable branch using PAT
+          git push origin ${{ steps.list-branches.outputs.stable }}
+          git push origin ${{ steps.list-branches.outputs.new_stable_tag }}
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -20,7 +20,6 @@ env:
  CONFLUENCE_IS_CLOUD: ${{ secrets.CONFLUENCE_IS_CLOUD }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}

  # LLMs
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -44,8 +43,8 @@ jobs:

  external-dependency-unit-tests:
    needs: discover-test-dirs
-    # Use larger runner with more resources for Vespa
-    runs-on: [runs-on, runner=16cpu-linux-x64, "run-id=${{ github.run_id }}"]
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"]
    
    strategy:
      fail-fast: false
@@ -54,7 +53,6 @@ jobs:

    env:
      PYTHONPATH: ./backend
-      MODEL_SERVER_HOST: "disabled"

    steps:
      - name: Checkout code
@@ -80,25 +78,12 @@ jobs:
      - name: Set up Standard Dependencies
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d minio relational_db cache index
-
-      - name: Wait for services
-        run: |
-          echo "Waiting for services to be ready..."
-          sleep 30
-          
-          # Wait for Vespa specifically
-          echo "Waiting for Vespa to be ready..."
-          timeout 300 bash -c 'until curl -f -s http://localhost:8081/ApplicationStatus > /dev/null 2>&1; do echo "Vespa not ready, waiting..."; sleep 10; done' || echo "Vespa timeout - continuing anyway"
-          
-          echo "Services should be ready now"
+          docker compose -f docker-compose.dev.yml -p onyx-stack up -d minio relational_db cache index

      - name: Run migrations
        run: |
          cd backend
-          # Run migrations to head
          alembic upgrade head
-          alembic heads --verbose

      - name: Run Tests for ${{ matrix.test-dir }}
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -65,45 +65,35 @@ jobs:
      if: steps.list-changed.outputs.changed == 'true'
      run: |
        echo "=== Adding Helm repositories ==="
-        helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
+        helm repo add bitnami https://charts.bitnami.com/bitnami
        helm repo add vespa https://onyx-dot-app.github.io/vespa-helm-charts
-        helm repo add cloudnative-pg https://cloudnative-pg.github.io/charts
-        helm repo add ot-container-kit https://ot-container-kit.github.io/helm-charts
-        helm repo add minio https://charts.min.io/
        helm repo update

-    - name: Install Redis operator
-      if: steps.list-changed.outputs.changed == 'true'
-      shell: bash
-      run: |
-        echo "=== Installing redis-operator CRDs ==="
-        helm upgrade --install redis-operator ot-container-kit/redis-operator \
-          --namespace redis-operator --create-namespace --wait --timeout 300s
-
-    - name: Pre-pull required images
+    - name: Pre-pull critical images
      if: steps.list-changed.outputs.changed == 'true'
      run: |
-        echo "=== Pre-pulling required images to avoid timeout ==="
+        echo "=== Pre-pulling critical images to avoid timeout ==="
+        # Get kind cluster name
        KIND_CLUSTER=$(kubectl config current-context | sed 's/kind-//')
        echo "Kind cluster: $KIND_CLUSTER"
-
-        IMAGES=(
-          "ghcr.io/cloudnative-pg/cloudnative-pg:1.27.0"
-          "quay.io/opstree/redis:v7.0.15"
-          "docker.io/onyxdotapp/onyx-web-server:latest"
-        )
-
-        for image in "${IMAGES[@]}"; do
-          echo "Pre-pulling $image"
-          if docker pull "$image"; then
-            kind load docker-image "$image" --name "$KIND_CLUSTER" || echo "Failed to load $image into kind"
-          else
-            echo "Failed to pull $image"
-          fi
-        done
-
+        
+        # Pre-pull images that are likely to be used
+        echo "Pre-pulling PostgreSQL image..."
+        docker pull postgres:15-alpine || echo "Failed to pull postgres:15-alpine"
+        kind load docker-image postgres:15-alpine --name $KIND_CLUSTER || echo "Failed to load postgres image"
+        
+        echo "Pre-pulling Redis image..."
+        docker pull redis:7-alpine || echo "Failed to pull redis:7-alpine"
+        kind load docker-image redis:7-alpine --name $KIND_CLUSTER || echo "Failed to load redis image"
+        
+        echo "Pre-pulling Onyx images..."
+        docker pull docker.io/onyxdotapp/onyx-web-server:latest || echo "Failed to pull onyx web server"
+        docker pull docker.io/onyxdotapp/onyx-backend:latest || echo "Failed to pull onyx backend"
+        kind load docker-image docker.io/onyxdotapp/onyx-web-server:latest --name $KIND_CLUSTER || echo "Failed to load onyx web server"
+        kind load docker-image docker.io/onyxdotapp/onyx-backend:latest --name $KIND_CLUSTER || echo "Failed to load onyx backend"
+        
        echo "=== Images loaded into Kind cluster ==="
-        docker exec "$KIND_CLUSTER"-control-plane crictl images | grep -E "(cloudnative-pg|redis|onyx)" || echo "Some images may still be loading..."
+        docker exec $KIND_CLUSTER-control-plane crictl images | grep -E "(postgres|redis|onyx)" || echo "Some images may still be loading..."

    - name: Validate chart dependencies
      if: steps.list-changed.outputs.changed == 'true'
@@ -159,7 +149,6 @@ jobs:
        
        # Run the actual installation with detailed logging
        echo "=== Starting ct install ==="
-        set +e
        ct install --all \
          --helm-extra-set-args="\
            --set=nginx.enabled=false \
@@ -167,10 +156,8 @@ jobs:
            --set=vespa.enabled=false \
            --set=slackbot.enabled=false \
            --set=postgresql.enabled=true \
-            --set=postgresql.nameOverride=cloudnative-pg \
-            --set=postgresql.cluster.storage.storageClass=standard \
+            --set=postgresql.primary.persistence.enabled=false \
            --set=redis.enabled=true \
-            --set=redis.storageSpec.volumeClaimTemplate.spec.storageClassName=standard \
            --set=webserver.replicaCount=1 \
            --set=api.replicaCount=0 \
            --set=inferenceCapability.replicaCount=0 \
@@ -182,20 +169,11 @@ jobs:
            --set=celery_worker_light.replicaCount=0 \
            --set=celery_worker_monitoring.replicaCount=0 \
            --set=celery_worker_primary.replicaCount=0 \
-            --set=celery_worker_user_file_processing.replicaCount=0 \
            --set=celery_worker_user_files_indexing.replicaCount=0" \
          --helm-extra-args="--timeout 900s --debug" \
          --debug --config ct.yaml
-        CT_EXIT=$?
-        set -e
-
-        if [[ $CT_EXIT -ne 0 ]]; then
-          echo "ct install failed with exit code $CT_EXIT"
-          exit $CT_EXIT
-        else
-          echo "=== Installation completed successfully ==="
-        fi
-
+        
+        echo "=== Installation completed successfully ==="
        kubectl get pods --all-namespaces

    - name: Post-install verification
@@ -220,7 +198,7 @@ jobs:
        
        echo "=== Recent logs for debugging ==="
        kubectl logs --all-namespaces --tail=50 | grep -i "error\|timeout\|failed\|pull" || echo "No error logs found"
-
+        
        echo "=== Helm releases ==="
        helm list --all-namespaces
      # the following would install only changed charts, but we only have one chart so 
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -22,11 +22,9 @@ env:
  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
  JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
-  JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
  PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
  PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
@@ -132,8 +130,6 @@ jobs:
          platforms: linux/arm64
          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
          push: true
-          outputs: type=registry
-          no-cache: true

  build-model-server-image:
    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
@@ -161,7 +157,6 @@ jobs:
          push: true
          outputs: type=registry
          provenance: false
-          no-cache: true

  build-integration-image:
    needs: prepare-build
@@ -194,8 +189,6 @@ jobs:
          platforms: linux/arm64
          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}
          push: true
-          outputs: type=registry
-          no-cache: true

  integration-tests:
    needs:
@@ -237,9 +230,9 @@ jobs:
          # Pull all images from registry in parallel
          echo "Pulling Docker images in parallel..."
          # Pull images from private registry
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
+          (docker pull ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
+          (docker pull ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
+          (docker pull ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &

          # Wait for all background jobs to complete
          wait
@@ -264,7 +257,7 @@ jobs:
          IMAGE_TAG=test \
          INTEGRATION_TESTS_MODE=true \
          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001 \
-          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
+          docker compose -f docker-compose.dev.yml -p onyx-stack up \
            relational_db \
            index \
            cache \
@@ -280,7 +273,7 @@ jobs:
        run: |
          echo "Starting wait-for-service script..."

-          docker logs -f onyx-api_server-1 &
+          docker logs -f onyx-stack-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
@@ -324,7 +317,7 @@ jobs:
          retry_wait_seconds: 10
          command: |
            echo "Running integration tests for ${{ matrix.test-dir.path }}..."
-            docker run --rm --network onyx_default \
+            docker run --rm --network onyx-stack_default \
              --name test-runner \
              -e POSTGRES_HOST=relational_db \
              -e POSTGRES_USER=postgres \
@@ -342,11 +335,9 @@ jobs:
              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
              -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-              -e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \
              -e JIRA_BASE_URL=${JIRA_BASE_URL} \
              -e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
              -e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
-              -e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \
              -e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
              -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
              -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
@@ -363,13 +354,13 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
+          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true

      - name: Dump all-container logs (optional)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
@@ -383,7 +374,7 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose down -v
+          docker compose -f docker-compose.dev.yml -p onyx-stack down -v


  multitenant-tests:
@@ -414,9 +405,9 @@ jobs:

      - name: Pull Docker images
        run: |
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
+          (docker pull ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
+          (docker pull ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
+          (docker pull ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
          wait
          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }} onyxdotapp/onyx-backend:test
          docker tag ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }} onyxdotapp/onyx-model-server:test
@@ -432,7 +423,7 @@ jobs:
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
          DEV_MODE=true \
-          docker compose -f docker-compose.multitenant-dev.yml up \
+          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack up \
            relational_db \
            index \
            cache \
@@ -447,7 +438,7 @@ jobs:
      - name: Wait for service to be ready (multi-tenant)
        run: |
          echo "Starting wait-for-service script for multi-tenant..."
-          docker logs -f onyx-api_server-1 &
+          docker logs -f onyx-stack-api_server-1 &
          start_time=$(date +%s)
          timeout=300
          while true; do
@@ -473,7 +464,7 @@ jobs:
      - name: Run Multi-Tenant Integration Tests
        run: |
          echo "Running multi-tenant integration tests..."
-          docker run --rm --network onyx_default \
+          docker run --rm --network onyx-stack_default \
            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
@@ -502,13 +493,13 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.multitenant-dev.yml logs --no-color api_server > $GITHUB_WORKSPACE/api_server_multitenant.log || true
+          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server_multitenant.log || true

      - name: Dump all-container logs (multi-tenant)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.multitenant-dev.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose-multitenant.log || true
+          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose-multitenant.log || true

      - name: Upload logs (multi-tenant)
        if: always()
@@ -521,7 +512,7 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.multitenant-dev.yml down -v
+          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack down -v

  required: 
    runs-on: blacksmith-2vcpu-ubuntu-2404-arm
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -19,11 +19,9 @@ env:
  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}
  JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
-  JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}
  PERM_SYNC_SHAREPOINT_CLIENT_ID: ${{ secrets.PERM_SYNC_SHAREPOINT_CLIENT_ID }}
  PERM_SYNC_SHAREPOINT_PRIVATE_KEY: ${{ secrets.PERM_SYNC_SHAREPOINT_PRIVATE_KEY }}
  PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD: ${{ secrets.PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD }}
@@ -129,8 +127,6 @@ jobs:
          platforms: linux/arm64
          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}
          push: true
-          outputs: type=registry
-          no-cache: true

  build-model-server-image:
    runs-on: blacksmith-16vcpu-ubuntu-2404-arm
@@ -158,7 +154,6 @@ jobs:
          push: true
          outputs: type=registry
          provenance: false
-          no-cache: true

  build-integration-image:
    needs: prepare-build
@@ -191,8 +186,6 @@ jobs:
          platforms: linux/arm64
          tags: ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}
          push: true
-          outputs: type=registry
-          no-cache: true

  integration-tests-mit:
    needs:
@@ -235,9 +228,9 @@ jobs:
          # Pull all images from registry in parallel
          echo "Pulling Docker images in parallel..."
          # Pull images from private registry
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
-          (docker pull --platform linux/arm64 ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &
+          (docker pull ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-backend:test-${{ github.run_id }}) &
+          (docker pull ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-model-server:test-${{ github.run_id }}) &
+          (docker pull ${{ env.PRIVATE_REGISTRY }}/integration-test-onyx-integration:test-${{ github.run_id }}) &

          # Wait for all background jobs to complete
          wait
@@ -260,7 +253,7 @@ jobs:
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
          INTEGRATION_TESTS_MODE=true \
-          docker compose -f docker-compose.yml -f docker-compose.dev.yml up \
+          docker compose -f docker-compose.dev.yml -p onyx-stack up \
            relational_db \
            index \
            cache \
@@ -276,7 +269,7 @@ jobs:
        run: |
          echo "Starting wait-for-service script..."

-          docker logs -f onyx-api_server-1 &
+          docker logs -f onyx-stack-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
@@ -321,7 +314,7 @@ jobs:
          retry_wait_seconds: 10
          command: |
            echo "Running integration tests for ${{ matrix.test-dir.path }}..."
-            docker run --rm --network onyx_default \
+            docker run --rm --network onyx-stack_default \
              --name test-runner \
              -e POSTGRES_HOST=relational_db \
              -e POSTGRES_USER=postgres \
@@ -339,11 +332,9 @@ jobs:
              -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
              -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
              -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-              -e CONFLUENCE_ACCESS_TOKEN_SCOPED=${CONFLUENCE_ACCESS_TOKEN_SCOPED} \
              -e JIRA_BASE_URL=${JIRA_BASE_URL} \
              -e JIRA_USER_EMAIL=${JIRA_USER_EMAIL} \
              -e JIRA_API_TOKEN=${JIRA_API_TOKEN} \
-              -e JIRA_API_TOKEN_SCOPED=${JIRA_API_TOKEN_SCOPED} \
              -e PERM_SYNC_SHAREPOINT_CLIENT_ID=${PERM_SYNC_SHAREPOINT_CLIENT_ID} \
              -e PERM_SYNC_SHAREPOINT_PRIVATE_KEY="${PERM_SYNC_SHAREPOINT_PRIVATE_KEY}" \
              -e PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD=${PERM_SYNC_SHAREPOINT_CERTIFICATE_PASSWORD} \
@@ -360,13 +351,13 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
+          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true

      - name: Dump all-container logs (optional)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
@@ -380,7 +371,7 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose down -v
+          docker compose -f docker-compose.dev.yml -p onyx-stack down -v

  
  required: 
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -56,8 +56,6 @@ jobs:
          provenance: false
          sbom: false
          push: true
-          outputs: type=registry
-          # no-cache: true

  build-backend-image:
    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
@@ -89,8 +87,6 @@ jobs:
          provenance: false
          sbom: false
          push: true
-          outputs: type=registry
-          # no-cache: true

  build-model-server-image:
    runs-on: blacksmith-8vcpu-ubuntu-2404-arm
@@ -122,8 +118,6 @@ jobs:
          provenance: false
          sbom: false
          push: true
-          outputs: type=registry
-          # no-cache: true

  playwright-tests:
    needs: [build-web-image, build-backend-image, build-model-server-image]
@@ -185,29 +179,24 @@ jobs:
        working-directory: ./web
        run: npx playwright install --with-deps

-      - name: Create .env file for Docker Compose
-        run: |
-          cat <<EOF > deployment/docker_compose/.env
-          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
-          AUTH_TYPE=basic
-          GEN_AI_API_KEY=${{ env.OPENAI_API_KEY }}
-          EXA_API_KEY=${{ env.EXA_API_KEY }}
-          REQUIRE_EMAIL_VERIFICATION=false
-          DISABLE_TELEMETRY=true
-          IMAGE_TAG=test
-          EOF
-
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
+          AUTH_TYPE=basic \
+          GEN_AI_API_KEY=${{ env.OPENAI_API_KEY }} \
+          EXA_API_KEY=${{ env.EXA_API_KEY }} \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          IMAGE_TAG=test \
+          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
        id: start_docker

      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."

-          docker logs -f onyx-api_server-1 &
+          docker logs -f danswer-stack-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
@@ -239,16 +228,14 @@ jobs:

      - name: Run Playwright tests
        working-directory: ./web
-        run: |
-          # Create test-results directory to ensure it exists for artifact upload
-          mkdir -p test-results
-          npx playwright test
+        run: npx playwright test

      - uses: actions/upload-artifact@v4
        if: always()
        with:
-          # Includes test results and debug screenshots
-          name: playwright-test-results-${{ github.run_id }}
+          # Chromatic automatically defaults to the test-results directory.
+          # Replace with the path to your custom directory and adjust the CHROMATIC_ARCHIVE_LOCATION environment variable accordingly.
+          name: test-results
          path: ./web/test-results
          retention-days: 30

@@ -257,7 +244,7 @@ jobs:
        if: success() || failure()
        run: |
          cd deployment/docker_compose
-          docker compose logs > docker-compose.log
+          docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
          mv docker-compose.log ${{ github.workspace }}/docker-compose.log

      - name: Upload logs
@@ -270,7 +257,7 @@ jobs:
      - name: Stop Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose down -v
+          docker compose -f docker-compose.dev.yml -p danswer-stack down -v

 # NOTE: Chromatic UI diff testing is currently disabled.
 # We are using Playwright for local and CI testing without visual regression checks.
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -20,13 +20,11 @@ env:
  CONFLUENCE_IS_CLOUD: ${{ secrets.CONFLUENCE_IS_CLOUD }}
  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-  CONFLUENCE_ACCESS_TOKEN_SCOPED: ${{ secrets.CONFLUENCE_ACCESS_TOKEN_SCOPED }}

  # Jira
  JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
  JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
  JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
-  JIRA_API_TOKEN_SCOPED: ${{ secrets.JIRA_API_TOKEN_SCOPED }}

  # Gong
  GONG_ACCESS_KEY: ${{ secrets.GONG_ACCESS_KEY }}
@@ -98,13 +96,6 @@ env:
  TEAMS_DIRECTORY_ID: ${{ secrets.TEAMS_DIRECTORY_ID }}
  TEAMS_SECRET: ${{ secrets.TEAMS_SECRET }}

-  # Bitbucket
-  BITBUCKET_WORKSPACE: ${{ secrets.BITBUCKET_WORKSPACE }}
-  BITBUCKET_REPOSITORIES: ${{ secrets.BITBUCKET_REPOSITORIES }}
-  BITBUCKET_PROJECTS: ${{ secrets.BITBUCKET_PROJECTS }}
-  BITBUCKET_EMAIL: ${{ secrets.BITBUCKET_EMAIL }}
-  BITBUCKET_API_TOKEN: ${{ secrets.BITBUCKET_API_TOKEN }}
-
 jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -77,7 +77,7 @@ jobs:
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
-          docker compose -f docker-compose.model-server-test.yml up -d indexing_model_server
+          docker compose -f docker-compose.model-server-test.yml -p onyx-stack up -d indexing_model_server
        id: start_docker

      - name: Wait for service to be ready
@@ -132,7 +132,7 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.model-server-test.yml logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+          docker compose -f docker-compose.model-server-test.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
@@ -145,5 +145,5 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.model-server-test.yml down -v
+          docker compose -f docker-compose.model-server-test.yml -p onyx-stack down -v
          
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -37,15 +37,6 @@ repos:
      additional_dependencies:
      - prettier

-  - repo: local
-    hooks:
-      - id: check-lazy-imports
-        name: Check lazy imports are not directly imported
-        entry: python3 backend/scripts/check_lazy_imports.py
-        language: system
-        files: ^backend/.*\.py$
-        pass_filenames: false
-
  # We would like to have a mypy pre-commit hook, but due to the fact that
  # pre-commit runs in it's own isolated environment, we would need to install
  # and keep in sync all dependencies so mypy has access to the appropriate type
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@@ -10,7 +10,7 @@ SKIP_WARM_UP=True

 # Always keep these on for Dev
 # Logs all model prompts to stdout
-LOG_ONYX_MODEL_INTERACTIONS=True
+LOG_DANSWER_MODEL_INTERACTIONS=True
 # More verbose logging
 LOG_LEVEL=debug

@@ -39,8 +39,8 @@ FAST_GEN_AI_MODEL_VERSION=gpt-4o

 # For Danswer Slack Bot, overrides the UI values so no need to set this up via UI every time
 # Only needed if using DanswerBot
-#ONYX_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
-#ONYX_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>
+#DANSWER_BOT_SLACK_APP_TOKEN=<REPLACE THIS>
+#DANSWER_BOT_SLACK_BOT_TOKEN=<REPLACE THIS>


 # Python stuff
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -1,468 +1,444 @@
 /* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */

 {
-  // Use IntelliSense to learn about possible attributes.
-  // Hover to view descriptions of existing attributes.
-  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
-  "version": "0.2.0",
-  "compounds": [
-    {
-      // Dummy entry used to label the group
-      "name": "--- Compound ---",
-      "configurations": ["--- Individual ---"],
-      "presentation": {
-        "group": "1"
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "compounds": [
+      {
+        // Dummy entry used to label the group
+        "name": "--- Compound ---",
+        "configurations": ["--- Individual ---"],
+        "presentation": {
+          "group": "1"
+        }
+      },
+      {
+        "name": "Run All Onyx Services",
+        "configurations": [
+          "Web Server",
+          "Model Server",
+          "API Server",
+          "Slack Bot",
+          "Celery primary",
+          "Celery light",
+          "Celery heavy",
+          "Celery docfetching",
+          "Celery docprocessing",
+          "Celery beat",
+          "Celery monitoring"
+        ],
+        "presentation": {
+          "group": "1"
+        },
+        "stopAll": true
+      },
+      {
+        "name": "Web / Model / API",
+        "configurations": ["Web Server", "Model Server", "API Server"],
+        "presentation": {
+          "group": "1"
+        },
+        "stopAll": true
+      },
+      {
+        "name": "Celery (all)",
+        "configurations": [
+          "Celery primary",
+          "Celery light",
+          "Celery heavy",
+          "Celery docfetching",
+          "Celery docprocessing",
+          "Celery beat",
+          "Celery monitoring"
+        ],
+        "presentation": {
+          "group": "1"
+        },
+        "stopAll": true
      }
+    ],
+    "configurations": [
+      {
+        // Dummy entry used to label the group
+        "name": "--- Individual ---",
+        "type": "node",
+        "request": "launch",
+        "presentation": {
+          "group": "2",
+          "order": 0
+        }
+      },
+      {
+        "name": "Web Server",
+        "type": "node",
+        "request": "launch",
+        "cwd": "${workspaceRoot}/web",
+        "runtimeExecutable": "npm",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "runtimeArgs": ["run", "dev"],
+        "presentation": {
+          "group": "2"
+        },
+        "console": "integratedTerminal",
+        "consoleTitle": "Web Server Console"
+      },
+      {
+        "name": "Model Server",
+        "consoleName": "Model Server",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "uvicorn",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1"
+        },
+        "args": ["model_server.main:app", "--reload", "--port", "9000"],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Model Server Console"
+      },
+      {
+        "name": "API Server",
+        "consoleName": "API Server",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "uvicorn",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_DANSWER_MODEL_INTERACTIONS": "True",
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1"
+        },
+        "args": ["onyx.main:app", "--reload", "--port", "8080"],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "API Server Console"
+      },
+      // For the listener to access the Slack API,
+      // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
+      {
+        "name": "Slack Bot",
+        "consoleName": "Slack Bot",
+        "type": "debugpy",
+        "request": "launch",
+        "program": "onyx/onyxbot/slack/listener.py",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Slack Bot Console"
+      },
+      {
+        "name": "Celery primary",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "INFO",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+          "-A",
+          "onyx.background.celery.versioned_apps.primary",
+          "worker",
+          "--pool=threads",
+          "--concurrency=4",
+          "--prefetch-multiplier=1",
+          "--loglevel=INFO",
+          "--hostname=primary@%n",
+          "-Q",
+          "celery"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Celery primary Console"
+      },
+      {
+        "name": "Celery light",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "INFO",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+          "-A",
+          "onyx.background.celery.versioned_apps.light",
+          "worker",
+          "--pool=threads",
+          "--concurrency=64",
+          "--prefetch-multiplier=8",
+          "--loglevel=INFO",
+          "--hostname=light@%n",
+          "-Q",
+          "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Celery light Console"
+      },
+      {
+        "name": "Celery heavy",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "INFO",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+          "-A",
+          "onyx.background.celery.versioned_apps.heavy",
+          "worker",
+          "--pool=threads",
+          "--concurrency=4",
+          "--prefetch-multiplier=1",
+          "--loglevel=INFO",
+          "--hostname=heavy@%n",
+          "-Q",
+          "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Celery heavy Console"
+      },
+      {
+        "name": "Celery docfetching",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+            "LOG_LEVEL": "DEBUG",
+            "PYTHONUNBUFFERED": "1",
+            "PYTHONPATH": "."
+        },
+        "args": [
+            "-A",
+            "onyx.background.celery.versioned_apps.docfetching",
+            "worker",
+            "--pool=threads",
+            "--concurrency=1",
+            "--prefetch-multiplier=1",
+            "--loglevel=INFO",
+            "--hostname=docfetching@%n",
+            "-Q",
+            "connector_doc_fetching,user_files_indexing"
+        ],
+        "presentation": {
+            "group": "2"
+        },
+        "consoleTitle": "Celery docfetching Console",
+        "justMyCode": false
    },
    {
-      "name": "Run All Onyx Services",
-      "configurations": [
-        "Web Server",
-        "Model Server",
-        "API Server",
-        "Slack Bot",
-        "Celery primary",
-        "Celery light",
-        "Celery heavy",
-        "Celery docfetching",
-        "Celery docprocessing",
-        "Celery beat",
-        "Celery monitoring",
-        "Celery user file processing"
-      ],
-      "presentation": {
-        "group": "1"
-      }
+        "name": "Celery docprocessing",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+            "ENABLE_MULTIPASS_INDEXING": "false",
+            "LOG_LEVEL": "DEBUG",
+            "PYTHONUNBUFFERED": "1",
+            "PYTHONPATH": "."
+        },
+        "args": [
+            "-A",
+            "onyx.background.celery.versioned_apps.docprocessing",
+            "worker",
+            "--pool=threads",
+            "--concurrency=6",
+            "--prefetch-multiplier=1",
+            "--loglevel=INFO",
+            "--hostname=docprocessing@%n",
+            "-Q",
+            "docprocessing"
+        ],
+        "presentation": {
+            "group": "2"
+        },
+        "consoleTitle": "Celery docprocessing Console",
+        "justMyCode": false
    },
-    {
-      "name": "Web / Model / API",
-      "configurations": ["Web Server", "Model Server", "API Server"],
-      "presentation": {
-        "group": "1"
-      }
+      {
+        "name": "Celery monitoring",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {},
+        "args": [
+          "-A",
+          "onyx.background.celery.versioned_apps.monitoring",
+          "worker",
+          "--pool=solo",
+          "--concurrency=1",
+          "--prefetch-multiplier=1",
+          "--loglevel=INFO",
+          "--hostname=monitoring@%n",
+          "-Q",
+          "monitoring"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Celery monitoring Console"
+      },
+      {
+        "name": "Celery beat",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "celery",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+          "-A",
+          "onyx.background.celery.versioned_apps.beat",
+          "beat",
+          "--loglevel=INFO"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Celery beat Console"
+      },
+      {
+        "name": "Pytest",
+        "consoleName": "Pytest",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "pytest",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+          "-v"
+          // Specify a sepcific module/test to run or provide nothing to run all tests
+          //"tests/unit/onyx/llm/answering/test_prune_and_merge.py"
+        ],
+        "presentation": {
+          "group": "2"
+        },
+        "consoleTitle": "Pytest Console"
+      },
+      {
+        // Dummy entry used to label the group
+        "name": "--- Tasks ---",
+        "type": "node",
+        "request": "launch",
+        "presentation": {
+          "group": "3",
+          "order": 0
+        }
+      },
+      {
+        "name": "Clear and Restart External Volumes and Containers",
+        "type": "node",
+        "request": "launch",
+        "runtimeExecutable": "bash",
+        "runtimeArgs": [
+          "${workspaceFolder}/backend/scripts/restart_containers.sh"
+        ],
+        "cwd": "${workspaceFolder}",
+        "console": "integratedTerminal",
+        "stopOnEntry": true,
+        "presentation": {
+          "group": "3"
+        }
+      },
+      {
+        "name": "Eval CLI",
+        "type": "debugpy",
+        "request": "launch",
+        "program": "${workspaceFolder}/backend/onyx/evals/eval_cli.py",
+        "cwd": "${workspaceFolder}/backend",
+        "console": "integratedTerminal",
+        "justMyCode": false,
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "presentation": {
+          "group": "3"
+        },
+        "env": {
+          "LOG_LEVEL": "INFO",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        },
+        "args": [
+            "--verbose"
+        ],
+        "consoleTitle": "Eval CLI Console"
    },
-    {
-      "name": "Celery (all)",
-      "configurations": [
-        "Celery primary",
-        "Celery light",
-        "Celery heavy",
-        "Celery docfetching",
-        "Celery docprocessing",
-        "Celery beat",
-        "Celery monitoring",
-        "Celery user file processing"
-      ],
-      "presentation": {
-        "group": "1"
+      {
+        // Celery jobs launched through a single background script (legacy)
+        // Recommend using the "Celery (all)" compound launch instead.
+        "name": "Background Jobs",
+        "consoleName": "Background Jobs",
+        "type": "debugpy",
+        "request": "launch",
+        "program": "scripts/dev_run_background_jobs.py",
+        "cwd": "${workspaceFolder}/backend",
+        "envFile": "${workspaceFolder}/.vscode/.env",
+        "env": {
+          "LOG_DANSWER_MODEL_INTERACTIONS": "True",
+          "LOG_LEVEL": "DEBUG",
+          "PYTHONUNBUFFERED": "1",
+          "PYTHONPATH": "."
+        }
      },
-      "stopAll": true
-    }
-  ],
-  "configurations": [
-    {
-      // Dummy entry used to label the group
-      "name": "--- Individual ---",
-      "type": "node",
-      "request": "launch",
-      "presentation": {
-        "group": "2",
-        "order": 0
-      }
-    },
-    {
-      "name": "Web Server",
-      "type": "node",
-      "request": "launch",
-      "cwd": "${workspaceRoot}/web",
-      "runtimeExecutable": "npm",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "runtimeArgs": ["run", "dev"],
-      "presentation": {
-        "group": "2"
+      {
+        "name": "Install Python Requirements",
+        "type": "node",
+        "request": "launch",
+        "runtimeExecutable": "bash",
+        "runtimeArgs": [
+          "-c",
+          "pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
+        ],
+        "cwd": "${workspaceFolder}",
+        "console": "integratedTerminal",
+        "presentation": {
+          "group": "3"
+        }
      },
-      "console": "integratedTerminal",
-      "consoleTitle": "Web Server Console"
-    },
-    {
-      "name": "Model Server",
-      "consoleName": "Model Server",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "uvicorn",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1"
-      },
-      "args": ["model_server.main:app", "--reload", "--port", "9000"],
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Model Server Console"
-    },
-    {
-      "name": "API Server",
-      "consoleName": "API Server",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "uvicorn",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_ONYX_MODEL_INTERACTIONS": "True",
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1"
-      },
-      "args": ["onyx.main:app", "--reload", "--port", "8080"],
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "API Server Console"
-    },
-    // For the listener to access the Slack API,
-    // ONYX_BOT_SLACK_APP_TOKEN & ONYX_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
-    {
-      "name": "Slack Bot",
-      "consoleName": "Slack Bot",
-      "type": "debugpy",
-      "request": "launch",
-      "program": "onyx/onyxbot/slack/listener.py",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Slack Bot Console"
-    },
-    {
-      "name": "Celery primary",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "celery",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_LEVEL": "INFO",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "args": [
-        "-A",
-        "onyx.background.celery.versioned_apps.primary",
-        "worker",
-        "--pool=threads",
-        "--concurrency=4",
-        "--prefetch-multiplier=1",
-        "--loglevel=INFO",
-        "--hostname=primary@%n",
-        "-Q",
-        "celery"
-      ],
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Celery primary Console"
-    },
-    {
-      "name": "Celery light",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "celery",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_LEVEL": "INFO",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "args": [
-        "-A",
-        "onyx.background.celery.versioned_apps.light",
-        "worker",
-        "--pool=threads",
-        "--concurrency=64",
-        "--prefetch-multiplier=8",
-        "--loglevel=INFO",
-        "--hostname=light@%n",
-        "-Q",
-        "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,index_attempt_cleanup"
-      ],
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Celery light Console"
-    },
-    {
-      "name": "Celery heavy",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "celery",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_LEVEL": "INFO",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "args": [
-        "-A",
-        "onyx.background.celery.versioned_apps.heavy",
-        "worker",
-        "--pool=threads",
-        "--concurrency=4",
-        "--prefetch-multiplier=1",
-        "--loglevel=INFO",
-        "--hostname=heavy@%n",
-        "-Q",
-        "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync"
-      ],
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Celery heavy Console"
-    },
-    {
-      "name": "Celery docfetching",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "celery",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "args": [
-        "-A",
-        "onyx.background.celery.versioned_apps.docfetching",
-        "worker",
-        "--pool=threads",
-        "--concurrency=1",
-        "--prefetch-multiplier=1",
-        "--loglevel=INFO",
-        "--hostname=docfetching@%n",
-        "-Q",
-        "connector_doc_fetching,user_files_indexing"
-      ],
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Celery docfetching Console",
-      "justMyCode": false
-    },
-    {
-      "name": "Celery docprocessing",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "celery",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "ENABLE_MULTIPASS_INDEXING": "false",
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "args": [
-        "-A",
-        "onyx.background.celery.versioned_apps.docprocessing",
-        "worker",
-        "--pool=threads",
-        "--concurrency=6",
-        "--prefetch-multiplier=1",
-        "--loglevel=INFO",
-        "--hostname=docprocessing@%n",
-        "-Q",
-        "docprocessing"
-      ],
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Celery docprocessing Console"
-    },
-    {
-      "name": "Celery beat",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "celery",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "args": [
-        "-A",
-        "onyx.background.celery.versioned_apps.beat",
-        "beat",
-        "--loglevel=INFO"
-      ],
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Celery beat Console"
-    },
-    {
-      "name": "Celery monitoring",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "celery",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {},
-      "args": [
-        "-A",
-        "onyx.background.celery.versioned_apps.monitoring",
-        "worker",
-        "--pool=solo",
-        "--concurrency=1",
-        "--prefetch-multiplier=1",
-        "--loglevel=INFO",
-        "--hostname=monitoring@%n",
-        "-Q",
-        "monitoring"
-      ],
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Celery monitoring Console"
-    },
-    {
-      "name": "Celery user file processing",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "celery",
-      "args": [
-        "-A",
-        "onyx.background.celery.versioned_apps.user_file_processing",
-        "worker",
-        "--loglevel=INFO",
-        "--hostname=user_file_processing@%n",
-        "--pool=threads",
-        "-Q",
-        "user_file_processing,user_file_project_sync"
-      ],
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Celery user file processing Console"
-    },
-    {
-      "name": "Pytest",
-      "consoleName": "Pytest",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "pytest",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "args": [
-        "-v"
-        // Specify a specific module/test to run or provide nothing to run all tests
-        // "tests/unit/onyx/llm/answering/test_prune_and_merge.py"
-      ],
-      "presentation": {
-        "group": "2"
-      },
-      "consoleTitle": "Pytest Console"
-    },
-    {
-      // Dummy entry used to label the group
-      "name": "--- Tasks ---",
-      "type": "node",
-      "request": "launch",
-      "presentation": {
-        "group": "3",
-        "order": 0
-      }
-    },
-    {
-      "name": "Clear and Restart External Volumes and Containers",
-      "type": "node",
-      "request": "launch",
-      "runtimeExecutable": "bash",
-      "runtimeArgs": [
-        "${workspaceFolder}/backend/scripts/restart_containers.sh"
-      ],
-      "cwd": "${workspaceFolder}",
-      "console": "integratedTerminal",
-      "stopOnEntry": true,
-      "presentation": {
-        "group": "3"
-      }
-    },
-    {
-      "name": "Eval CLI",
-      "type": "debugpy",
-      "request": "launch",
-      "program": "${workspaceFolder}/backend/onyx/evals/eval_cli.py",
-      "cwd": "${workspaceFolder}/backend",
-      "console": "integratedTerminal",
-      "justMyCode": false,
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "presentation": {
-        "group": "3"
-      },
-      "env": {
-        "LOG_LEVEL": "INFO",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      },
-      "args": ["--verbose"],
-      "consoleTitle": "Eval CLI Console"
-    },
-    {
-      // Celery jobs launched through a single background script (legacy)
-      // Recommend using the "Celery (all)" compound launch instead.
-      "name": "Background Jobs",
-      "consoleName": "Background Jobs",
-      "type": "debugpy",
-      "request": "launch",
-      "program": "scripts/dev_run_background_jobs.py",
-      "cwd": "${workspaceFolder}/backend",
-      "envFile": "${workspaceFolder}/.vscode/.env",
-      "env": {
-        "LOG_ONYX_MODEL_INTERACTIONS": "True",
-        "LOG_LEVEL": "DEBUG",
-        "PYTHONUNBUFFERED": "1",
-        "PYTHONPATH": "."
-      }
-    },
-    {
-      "name": "Install Python Requirements",
-      "type": "node",
-      "request": "launch",
-      "runtimeExecutable": "bash",
-      "runtimeArgs": [
-        "-c",
-        "pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt"
-      ],
-      "cwd": "${workspaceFolder}",
-      "console": "integratedTerminal",
-      "presentation": {
-        "group": "3"
-      }
-    },
    {
      // script to generate the openapi schema
      "name": "Onyx OpenAPI Schema Generator",
@@ -475,7 +451,10 @@
        "PYTHONUNBUFFERED": "1",
        "PYTHONPATH": "."
      },
-      "args": ["--filename", "generated/openapi.json"]
+      "args": [
+        "--filename",
+        "generated/openapi.json"
+      ]
    },
    {
      // script to debug multi tenant db issues
@@ -500,12 +479,13 @@
        "generated/tenants_by_num_docs.csv"
      ]
    },
-    {
-      "name": "Debug React Web App in Chrome",
-      "type": "chrome",
-      "request": "launch",
-      "url": "http://localhost:3000",
-      "webRoot": "${workspaceFolder}/web"
-    }
-  ]
-}
+      {
+        "name": "Debug React Web App in Chrome",
+        "type": "chrome",
+        "request": "launch",
+        "url": "http://localhost:3000",
+        "webRoot": "${workspaceFolder}/web"
+      }
+    ]
+  }
+ 
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -4,14 +4,14 @@ This file provides guidance to Codex when working with code in this repository.

 ## KEY NOTES

- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
+- If you run into any missing python dependency errors, try running your command with `workon onyx &&` in front
 to assume the python venv.
 - To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
 - If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
 `a`. The app can be accessed at `http://localhost:3000`.
 - You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
 make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
+- To connect to the Postgres database, use: `docker exec -it onyx-stack-relational_db-1 psql -U postgres -c "<SQL>"`
 - When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
 - Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
 outside of those directories.
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,14 +4,14 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co

 ## KEY NOTES

- If you run into any missing python dependency errors, try running your command with `source backend/.venv/bin/activate` \
+- If you run into any missing python dependency errors, try running your command with `workon onyx &&` in front
 to assume the python venv.
 - To make tests work, check the `.env` file at the root of the project to find an OpenAI key.
 - If using `playwright` to explore the frontend, you can usually log in with username `a@test.com` and password
 `a`. The app can be accessed at `http://localhost:3000`.
 - You should assume that all Onyx services are running. To verify, you can check the `backend/log` directory to
 make sure we see logs coming out from the relevant service.
- To connect to the Postgres database, use: `docker exec -it onyx-relational_db-1 psql -U postgres -c "<SQL>"`
+- To connect to the Postgres database, use: `docker exec -it onyx-stack-relational_db-1 psql -U postgres -c "<SQL>"`
 - When making calls to the backend, always go through the frontend. E.g. make a call to `http://localhost:3000/api/persona` not `http://localhost:8080/api/persona`
 - Put ALL db operations under the `backend/onyx/db` / `backend/ee/onyx/db` directories. Don't run queries
 outside of those directories.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -84,6 +84,10 @@ python -m venv .venv
 source .venv/bin/activate
 ```

+> **Note:**
+> This virtual environment MUST NOT be set up WITHIN the onyx directory if you plan on using mypy within certain IDEs.
+> For simplicity, we recommend setting up the virtual environment outside of the onyx directory.
+
 _For Windows, activate the virtual environment using Command Prompt:_

 ```bash
@@ -105,11 +109,6 @@ pip install -r backend/requirements/ee.txt
 pip install -r backend/requirements/model_server.txt
 ```

-Fix vscode/cursor auto-imports:
-```bash
-pip install -e .
-```
-
 Install Playwright for Python (headless browser required by the Web Connector)

 In the activated Python virtualenv, install Playwright for Python by running:
@@ -176,7 +175,7 @@ You will need Docker installed to run these containers.
 First navigate to `onyx/deployment/docker_compose`, then start up Postgres/Vespa/Redis/MinIO with:

 ```bash
-docker compose up -d index relational_db cache minio
+docker compose -f docker-compose.dev.yml -p onyx-stack up -d index relational_db cache minio
 ```

 (index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
@@ -258,7 +257,7 @@ You can run the full Onyx application stack from pre-built images including all
 Navigate to `onyx/deployment/docker_compose` and run:

 ```bash
-docker compose up -d
+docker compose -f docker-compose.dev.yml -p onyx-stack up -d
 ```

 After Docker pulls and starts these containers, navigate to `http://localhost:3000` to use Onyx.
@@ -266,7 +265,7 @@ After Docker pulls and starts these containers, navigate to `http://localhost:30
 If you want to make changes to Onyx and run those changes in Docker, you can also build a local version of the Onyx container images that incorporates your changes like so:

 ```bash
-docker compose up -d --build
+docker compose -f docker-compose.dev.yml -p onyx-stack up -d --build
 ```


--- a/README.md
+++ b/README.md
@@ -1,103 +1,116 @@
+<!-- ONYX_METADATA={"link": "https://github.com/onyx-dot-app/onyx/blob/main/README.md"} -->
+
 <a name="readme-top"></a>

 <h2 align="center">
-    <a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
+<a href="https://www.onyx.app/"> <img width="50%" src="https://github.com/onyx-dot-app/onyx/blob/logo/OnyxLogoCropped.jpg?raw=true)" /></a>
 </h2>

-<p align="center">Open Source AI Platform</p>
+<p align="center">
+<p align="center">Open Source Gen-AI + Enterprise Search.</p>

 <p align="center">
-    <a href="https://discord.gg/TDJ59cGV2X" target="_blank">
-        <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
-    </a>
-    <a href="https://docs.onyx.app/" target="_blank">
-        <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
-    </a>
-    <a href="https://docs.onyx.app/" target="_blank">
-        <img src="https://img.shields.io/website?url=https://www.onyx.app&up_message=visit&up_color=blue" alt="Documentation">
-    </a>
-    <a href="https://github.com/onyx-dot-app/onyx/blob/main/LICENSE" target="_blank">
-        <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
-    </a>
+<a href="https://docs.onyx.app/" target="_blank">
+    <img src="https://img.shields.io/badge/docs-view-blue" alt="Documentation">
+</a>
+<a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-34lu4m7xg-TsKGO6h8PDvR5W27zTdyhA" target="_blank">
+    <img src="https://img.shields.io/badge/slack-join-blue.svg?logo=slack" alt="Slack">
+</a>
+<a href="https://discord.gg/TDJ59cGV2X" target="_blank">
+    <img src="https://img.shields.io/badge/discord-join-blue.svg?logo=discord&logoColor=white" alt="Discord">
+</a>
+<a href="https://github.com/onyx-dot-app/onyx/blob/main/README.md" target="_blank">
+    <img src="https://img.shields.io/static/v1?label=license&message=MIT&color=blue" alt="License">
+</a>
 </p>

+<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI platform connected to your company's docs, apps, and people.
+Onyx provides a feature rich Chat interface and plugs into any LLM of your choice.
+Keep knowledge and access controls sync-ed across over 40 connectors like Google Drive, Slack, Confluence, Salesforce, etc.
+Create custom AI agents with unique prompts, knowledge, and actions that the agents can take.
+Onyx can be deployed securely anywhere and for any scale - on a laptop, on-premise, or to cloud.


-**[Onyx](https://www.onyx.app/)** is a feature-rich, self-hostable Chat UI that works with any LLM. It is easy to deploy and can run in a completely airgapped environment.
+<h3>Feature Highlights</h3>

-Onyx comes loaded with advanced features like Agents, Web Search, RAG, MCP, Deep Research, Connectors to 40+ knowledge sources, and more.
+**Deep research over your team's knowledge:**

-> [!TIP]
-> Run Onyx with one command (or see deployment section below):
-> ```
-> curl -fsSL https://raw.githubusercontent.com/onyx-dot-app/onyx/main/deployment/docker_compose/install.sh > install.sh && chmod +x install.sh && ./install.sh
-> ```
+https://private-user-images.githubusercontent.com/32520769/414509312-48392e83-95d0-4fb5-8650-a396e05e0a32.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3Mzk5Mjg2MzYsIm5iZiI6MTczOTkyODMzNiwicGF0aCI6Ii8zMjUyMDc2OS80MTQ1MDkzMTItNDgzOTJlODMtOTVkMC00ZmI1LTg2NTAtYTM5NmUwNWUwYTMyLm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAyMTklMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMjE5VDAxMjUzNlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPWFhMzk5Njg2Y2Y5YjFmNDNiYTQ2YzM5ZTg5YWJiYTU2NWMyY2YwNmUyODE2NWUxMDRiMWQxZWJmODI4YTA0MTUmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.a9D8A0sgKE9AoaoE-mfFbJ6_OKYeqaf7TZ4Han2JfW8

-****
+
+**Use Onyx as a secure AI Chat with any LLM:**

 ![Onyx Chat Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxChatSilentDemo.gif)


+**Easily set up connectors to your apps:**

-## ⭐ Features
- **🤖 Custom Agents:** Build AI Agents with unique instructions, knowledge and actions.
- **🌍 Web Search:** Browse the web with Google PSE, Exa, and Serper as well as an in-house scraper or Firecrawl.
- **🔍 RAG:** Best in class hybrid-search + knowledge graph for uploaded files and ingested documents from connectors. 
- **🔄 Connectors:** Pull knowledge, metadata, and access information from over 40 applications.
- **🔬 Deep Research:** Get in depth answers with an agentic multi-step search.
- **▶️ Actions & MCP:** Give AI Agents the ability to interact with external systems.
- **💻 Code Interpreter:** Execute code to analyze data, render graphs and create files.
- **🎨 Image Generation:** Generate images based on user prompts.
- **👥 Collaboration:** Chat sharing, feedback gathering, user management, usage analytics, and more.
-
-Onyx works with all LLMs (like OpenAI, Anthropic, Gemini, etc.) and self-hosted LLMs (like Ollama, vLLM, etc.)
-
-To learn more about the features, check out our [documentation](https://docs.onyx.app/welcome)!
+![Onyx Connector Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxConnectorSilentDemo.gif)


+**Access Onyx where your team already works:**

-## 🚀 Deployment
-Onyx supports deployments in Docker, Kubernetes, Terraform, along with guides for major cloud providers.
-
-See guides below:
- [Docker](https://docs.onyx.app/deployment/local/docker) or [Quickstart](https://docs.onyx.app/deployment/getting_started/quickstart) (best for most users)
- [Kubernetes](https://docs.onyx.app/deployment/local/kubernetes) (best for large teams)
- [Terraform](https://docs.onyx.app/deployment/local/terraform) (best for teams already using Terraform)
- Cloud specific guides (best if specifically using [AWS EKS](https://docs.onyx.app/deployment/cloud/aws/eks), [Azure VMs](https://docs.onyx.app/deployment/cloud/azure), etc.)
-
-> [!TIP]  
-> **To try Onyx for free without deploying, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.
+![Onyx Bot Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxBot.png)


+## Deployment
+**To try it out for free and get started in seconds, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.

-## 🔍 Other Notable Benefits
-Onyx is built for teams of all sizes, from individual users to the largest global enterprises.
+Onyx can also be run locally (even on a laptop) or deployed on a virtual machine with a single
+`docker compose` command. Checkout our [docs](https://docs.onyx.app/deployment/getting_started/quickstart) to learn more.

- **Enterprise Search**: far more than simple RAG, Onyx has custom indexing and retrieval that remains performant and accurate for scales of up to tens of millions of documents.
- **Security**: SSO (OIDC/SAML/OAuth2), RBAC, encryption of credentials, etc.
- **Management UI**: different user roles such as basic, curator, and admin.
- **Document Permissioning**: mirrors user access from external apps for RAG use cases.
+We also have built-in support for high-availability/scalable deployment on Kubernetes.
+References [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment).


+## 🔍 Other Notable Benefits of Onyx
+- Custom deep learning models for indexing and inference time, only through Onyx + learning from user feedback.
+- Flexible security features like SSO (OIDC/SAML/OAuth2), RBAC, encryption of credentials, etc.
+- Knowledge curation features like document-sets, query history, usage analytics, etc.
+- Scalable deployment options tested up to many tens of thousands users and hundreds of millions of documents.
+

 ## 🚧 Roadmap
-To see ongoing and upcoming projects, check out our [roadmap](https://github.com/orgs/onyx-dot-app/projects/2)!
+- New methods in information retrieval (StructRAG, LightGraphRAG, etc.)
+- Personalized Search
+- Organizational understanding and ability to locate and suggest experts from your team.
+- Code Search
+- SQL and Structured Query Language


+## 🔌 Connectors
+Keep knowledge and access up to sync across 40+ connectors:
+
+- Google Drive
+- Confluence
+- Slack
+- Gmail
+- Salesforce
+- Microsoft Sharepoint
+- Github
+- Jira
+- Zendesk
+- Gong
+- Microsoft Teams
+- Dropbox
+- Local Files
+- Websites
+- And more ...
+
+See the full list [here](https://docs.onyx.app/admin/connectors/overview).
+

 ## 📚 Licensing
 There are two editions of Onyx:

- Onyx Community Edition (CE) is available freely under the MIT license.
+- Onyx Community Edition (CE) is available freely under the MIT Expat license. Simply follow the Deployment guide above.
 - Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
 For feature details, check out [our website](https://www.onyx.app/pricing).

-
-
-## 👪 Community
-Join our open source community on **[Discord](https://discord.gg/TDJ59cGV2X)**!
-
+To try the Onyx Enterprise Edition:
+1. Checkout [Onyx Cloud](https://cloud.onyx.app/signup).
+2. For self-hosting the Enterprise Edition, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).


 ## 💡 Contributing
-Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
+Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
--- a/backend/alembic/versions/0cd424f32b1d_user_file_data_preparation_and_backfill.py
+++ b/backend/alembic/versions/0cd424f32b1d_user_file_data_preparation_and_backfill.py
@@ -1,389 +0,0 @@
-"""Migration 2: User file data preparation and backfill
-
-Revision ID: 0cd424f32b1d
-Revises: 9b66d3156fc6
-Create Date: 2025-09-22 09:44:42.727034
-
-This migration populates the new columns added in migration 1.
-It prepares data for the UUID transition and relationship migration.
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy import text
-import logging
-
-logger = logging.getLogger("alembic.runtime.migration")
-
-# revision identifiers, used by Alembic.
-revision = "0cd424f32b1d"
-down_revision = "9b66d3156fc6"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    """Populate new columns with data."""
-
-    bind = op.get_bind()
-    inspector = sa.inspect(bind)
-
-    # === Step 1: Populate user_file.new_id ===
-    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
-    has_new_id = "new_id" in user_file_columns
-
-    if has_new_id:
-        logger.info("Populating user_file.new_id with UUIDs...")
-
-        # Count rows needing UUIDs
-        null_count = bind.execute(
-            text("SELECT COUNT(*) FROM user_file WHERE new_id IS NULL")
-        ).scalar_one()
-
-        if null_count > 0:
-            logger.info(f"Generating UUIDs for {null_count} user_file records...")
-
-            # Populate in batches to avoid long locks
-            batch_size = 10000
-            total_updated = 0
-
-            while True:
-                result = bind.execute(
-                    text(
-                        """
-                    UPDATE user_file
-                    SET new_id = gen_random_uuid()
-                    WHERE new_id IS NULL
-                    AND id IN (
-                        SELECT id FROM user_file
-                        WHERE new_id IS NULL
-                        LIMIT :batch_size
-                    )
-                """
-                    ),
-                    {"batch_size": batch_size},
-                )
-
-                updated = result.rowcount
-                total_updated += updated
-
-                if updated < batch_size:
-                    break
-
-                logger.info(f"  Updated {total_updated}/{null_count} records...")
-
-            logger.info(f"Generated UUIDs for {total_updated} user_file records")
-
-        # Verify all records have UUIDs
-        remaining_null = bind.execute(
-            text("SELECT COUNT(*) FROM user_file WHERE new_id IS NULL")
-        ).scalar_one()
-
-        if remaining_null > 0:
-            raise Exception(
-                f"Failed to populate all user_file.new_id values ({remaining_null} NULL)"
-            )
-
-        # Lock down the column
-        op.alter_column("user_file", "new_id", nullable=False)
-        op.alter_column("user_file", "new_id", server_default=None)
-        logger.info("Locked down user_file.new_id column")
-
-    # === Step 2: Populate persona__user_file.user_file_id_uuid ===
-    persona_user_file_columns = [
-        col["name"] for col in inspector.get_columns("persona__user_file")
-    ]
-
-    if has_new_id and "user_file_id_uuid" in persona_user_file_columns:
-        logger.info("Populating persona__user_file.user_file_id_uuid...")
-
-        # Count rows needing update
-        null_count = bind.execute(
-            text(
-                """
-            SELECT COUNT(*) FROM persona__user_file
-            WHERE user_file_id IS NOT NULL AND user_file_id_uuid IS NULL
-        """
-            )
-        ).scalar_one()
-
-        if null_count > 0:
-            logger.info(f"Updating {null_count} persona__user_file records...")
-
-            # Update in batches
-            batch_size = 10000
-            total_updated = 0
-
-            while True:
-                result = bind.execute(
-                    text(
-                        """
-                    UPDATE persona__user_file p
-                    SET user_file_id_uuid = uf.new_id
-                    FROM user_file uf
-                    WHERE p.user_file_id = uf.id
-                    AND p.user_file_id_uuid IS NULL
-                    AND p.persona_id IN (
-                        SELECT persona_id
-                        FROM persona__user_file
-                        WHERE user_file_id_uuid IS NULL
-                        LIMIT :batch_size
-                    )
-                """
-                    ),
-                    {"batch_size": batch_size},
-                )
-
-                updated = result.rowcount
-                total_updated += updated
-
-                if updated < batch_size:
-                    break
-
-                logger.info(f"  Updated {total_updated}/{null_count} records...")
-
-            logger.info(f"Updated {total_updated} persona__user_file records")
-
-        # Verify all records are populated
-        remaining_null = bind.execute(
-            text(
-                """
-            SELECT COUNT(*) FROM persona__user_file
-            WHERE user_file_id IS NOT NULL AND user_file_id_uuid IS NULL
-        """
-            )
-        ).scalar_one()
-
-        if remaining_null > 0:
-            raise Exception(
-                f"Failed to populate all persona__user_file.user_file_id_uuid values ({remaining_null} NULL)"
-            )
-
-        op.alter_column("persona__user_file", "user_file_id_uuid", nullable=False)
-        logger.info("Locked down persona__user_file.user_file_id_uuid column")
-
-    # === Step 3: Create user_project records from chat_folder ===
-    if "chat_folder" in inspector.get_table_names():
-        logger.info("Creating user_project records from chat_folder...")
-
-        result = bind.execute(
-            text(
-                """
-            INSERT INTO user_project (user_id, name)
-            SELECT cf.user_id, cf.name
-            FROM chat_folder cf
-            WHERE NOT EXISTS (
-                SELECT 1
-                FROM user_project up
-                WHERE up.user_id = cf.user_id AND up.name = cf.name
-            )
-        """
-            )
-        )
-
-        logger.info(f"Created {result.rowcount} user_project records from chat_folder")
-
-    # === Step 4: Populate chat_session.project_id ===
-    chat_session_columns = [
-        col["name"] for col in inspector.get_columns("chat_session")
-    ]
-
-    if "folder_id" in chat_session_columns and "project_id" in chat_session_columns:
-        logger.info("Populating chat_session.project_id...")
-
-        # Count sessions needing update
-        null_count = bind.execute(
-            text(
-                """
-            SELECT COUNT(*) FROM chat_session
-            WHERE project_id IS NULL AND folder_id IS NOT NULL
-        """
-            )
-        ).scalar_one()
-
-        if null_count > 0:
-            logger.info(f"Updating {null_count} chat_session records...")
-
-            result = bind.execute(
-                text(
-                    """
-                UPDATE chat_session cs
-                SET project_id = up.id
-                FROM chat_folder cf
-                JOIN user_project up ON up.user_id = cf.user_id AND up.name = cf.name
-                WHERE cs.folder_id = cf.id AND cs.project_id IS NULL
-            """
-                )
-            )
-
-            logger.info(f"Updated {result.rowcount} chat_session records")
-
-        # Verify all records are populated
-        remaining_null = bind.execute(
-            text(
-                """
-            SELECT COUNT(*) FROM chat_session
-            WHERE project_id IS NULL AND folder_id IS NOT NULL
-        """
-            )
-        ).scalar_one()
-
-        if remaining_null > 0:
-            logger.warning(
-                f"Warning: {remaining_null} chat_session records could not be mapped to projects"
-            )
-
-    # === Step 5: Update plaintext FileRecord IDs/display names to UUID scheme ===
-    # Prior to UUID migration, plaintext cache files were stored with file_id like 'plain_text_<int_id>'.
-    # After migration, we use 'plaintext_<uuid>' (note the name change to 'plaintext_').
-    # This step remaps existing FileRecord rows to the new naming while preserving object_key/bucket.
-    logger.info("Updating plaintext FileRecord ids and display names to UUID scheme...")
-
-    # Count legacy plaintext records that can be mapped to UUID user_file ids
-    count_query = text(
-        """
-        SELECT COUNT(*)
-        FROM file_record fr
-        JOIN user_file uf ON fr.file_id = CONCAT('plaintext_', uf.id::text)
-        WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
-        """
-    )
-    legacy_count = bind.execute(count_query).scalar_one()
-
-    if legacy_count and legacy_count > 0:
-        logger.info(f"Found {legacy_count} legacy plaintext file records to update")
-
-        # Update display_name first for readability (safe regardless of rename)
-        bind.execute(
-            text(
-                """
-                UPDATE file_record fr
-                SET display_name = CONCAT('Plaintext for user file ', uf.new_id::text)
-                FROM user_file uf
-                WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
-                    AND fr.file_id = CONCAT('plaintext_', uf.id::text)
-                """
-            )
-        )
-
-        # Remap file_id from 'plaintext_<int>' -> 'plaintext_<uuid>' using transitional new_id
-        # Use a single UPDATE ... WHERE file_id LIKE 'plain_text_%'
-        # and ensure it aligns to existing user_file ids to avoid renaming unrelated rows
-        result = bind.execute(
-            text(
-                """
-                UPDATE file_record fr
-                SET file_id = CONCAT('plaintext_', uf.new_id::text)
-                FROM user_file uf
-                WHERE LOWER(fr.file_origin::text) = 'plaintext_cache'
-                    AND fr.file_id = CONCAT('plaintext_', uf.id::text)
-                """
-            )
-        )
-        logger.info(
-            f"Updated {result.rowcount} plaintext file_record ids to UUID scheme"
-        )
-
-    # === Step 6: Ensure document_id_migrated default TRUE and backfill existing FALSE ===
-    # New records should default to migrated=True so the migration task won't run for them.
-    # Existing rows that had a legacy document_id should be marked as not migrated to be processed.
-
-    # Backfill existing records: if document_id is not null, set to FALSE
-    bind.execute(
-        text(
-            """
-            UPDATE user_file
-            SET document_id_migrated = FALSE
-            WHERE document_id IS NOT NULL
-            """
-        )
-    )
-
-    # === Step 7: Backfill user_file.status from index_attempt ===
-    logger.info("Backfilling user_file.status from index_attempt...")
-
-    # Update user_file status based on latest index attempt
-    # Using CTEs instead of temp tables for asyncpg compatibility
-    result = bind.execute(
-        text(
-            """
-        WITH latest_attempt AS (
-            SELECT DISTINCT ON (ia.connector_credential_pair_id)
-                ia.connector_credential_pair_id,
-                ia.status
-            FROM index_attempt ia
-            ORDER BY ia.connector_credential_pair_id, ia.time_updated DESC
-        ),
-        uf_to_ccp AS (
-            SELECT DISTINCT uf.id AS uf_id, ccp.id AS cc_pair_id
-            FROM user_file uf
-            JOIN document_by_connector_credential_pair dcc
-                ON dcc.id = REPLACE(uf.document_id, 'USER_FILE_CONNECTOR__', 'FILE_CONNECTOR__')
-            JOIN connector_credential_pair ccp
-                ON ccp.connector_id = dcc.connector_id
-                AND ccp.credential_id = dcc.credential_id
-        )
-        UPDATE user_file uf
-        SET status = CASE
-            WHEN la.status IN ('NOT_STARTED', 'IN_PROGRESS') THEN 'PROCESSING'
-            WHEN la.status = 'SUCCESS' THEN 'COMPLETED'
-            ELSE 'FAILED'
-        END
-        FROM uf_to_ccp ufc
-        LEFT JOIN latest_attempt la
-            ON la.connector_credential_pair_id = ufc.cc_pair_id
-        WHERE uf.id = ufc.uf_id
-        AND uf.status = 'PROCESSING'
-    """
-        )
-    )
-
-    logger.info(f"Updated status for {result.rowcount} user_file records")
-
-    logger.info("Migration 2 (data preparation) completed successfully")
-
-
-def downgrade() -> None:
-    """Reset populated data to allow clean downgrade of schema."""
-
-    bind = op.get_bind()
-    inspector = sa.inspect(bind)
-
-    logger.info("Starting downgrade of data preparation...")
-
-    # Reset user_file columns to allow nulls before data removal
-    if "user_file" in inspector.get_table_names():
-        columns = [col["name"] for col in inspector.get_columns("user_file")]
-
-        if "new_id" in columns:
-            op.alter_column(
-                "user_file",
-                "new_id",
-                nullable=True,
-                server_default=sa.text("gen_random_uuid()"),
-            )
-            # Optionally clear the data
-            # bind.execute(text("UPDATE user_file SET new_id = NULL"))
-            logger.info("Reset user_file.new_id to nullable")
-
-    # Reset persona__user_file.user_file_id_uuid
-    if "persona__user_file" in inspector.get_table_names():
-        columns = [col["name"] for col in inspector.get_columns("persona__user_file")]
-
-        if "user_file_id_uuid" in columns:
-            op.alter_column("persona__user_file", "user_file_id_uuid", nullable=True)
-            # Optionally clear the data
-            # bind.execute(text("UPDATE persona__user_file SET user_file_id_uuid = NULL"))
-            logger.info("Reset persona__user_file.user_file_id_uuid to nullable")
-
-    # Note: We don't delete user_project records or reset chat_session.project_id
-    # as these might be in use and can be handled by the schema downgrade
-
-    # Reset user_file.status to default
-    if "user_file" in inspector.get_table_names():
-        columns = [col["name"] for col in inspector.get_columns("user_file")]
-        if "status" in columns:
-            bind.execute(text("UPDATE user_file SET status = 'PROCESSING'"))
-            logger.info("Reset user_file.status to default")
-
-    logger.info("Downgrade completed successfully")
--- a/backend/alembic/versions/16c37a30adf2_user_file_relationship_migration.py
+++ b/backend/alembic/versions/16c37a30adf2_user_file_relationship_migration.py
@@ -1,261 +0,0 @@
-"""Migration 3: User file relationship migration
-
-Revision ID: 16c37a30adf2
-Revises: 0cd424f32b1d
-Create Date: 2025-09-22 09:47:34.175596
-
-This migration converts folder-based relationships to project-based relationships.
-It migrates persona__user_folder to persona__user_file and populates project__user_file.
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy import text
-import logging
-
-logger = logging.getLogger("alembic.runtime.migration")
-
-# revision identifiers, used by Alembic.
-revision = "16c37a30adf2"
-down_revision = "0cd424f32b1d"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    """Migrate folder-based relationships to project-based relationships."""
-
-    bind = op.get_bind()
-    inspector = sa.inspect(bind)
-
-    # === Step 1: Migrate persona__user_folder to persona__user_file ===
-    table_names = inspector.get_table_names()
-
-    if "persona__user_folder" in table_names and "user_file" in table_names:
-        user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
-        has_new_id = "new_id" in user_file_columns
-
-        if has_new_id and "folder_id" in user_file_columns:
-            logger.info(
-                "Migrating persona__user_folder relationships to persona__user_file..."
-            )
-
-            # Count relationships to migrate (asyncpg-compatible)
-            count_query = text(
-                """
-                SELECT COUNT(*)
-                FROM (
-                    SELECT DISTINCT puf.persona_id, uf.id
-                    FROM persona__user_folder puf
-                    JOIN user_file uf ON uf.folder_id = puf.user_folder_id
-                    WHERE NOT EXISTS (
-                        SELECT 1
-                        FROM persona__user_file p2
-                        WHERE p2.persona_id = puf.persona_id
-                        AND p2.user_file_id = uf.id
-                    )
-                ) AS distinct_pairs
-            """
-            )
-            to_migrate = bind.execute(count_query).scalar_one()
-
-            if to_migrate > 0:
-                logger.info(f"Creating {to_migrate} persona-file relationships...")
-
-                # Migrate in batches to avoid memory issues
-                batch_size = 10000
-                total_inserted = 0
-
-                while True:
-                    # Insert batch directly using subquery (asyncpg compatible)
-                    result = bind.execute(
-                        text(
-                            """
-                        INSERT INTO persona__user_file (persona_id, user_file_id, user_file_id_uuid)
-                        SELECT DISTINCT puf.persona_id, uf.id as file_id, uf.new_id
-                        FROM persona__user_folder puf
-                        JOIN user_file uf ON uf.folder_id = puf.user_folder_id
-                        WHERE NOT EXISTS (
-                            SELECT 1
-                            FROM persona__user_file p2
-                            WHERE p2.persona_id = puf.persona_id
-                            AND p2.user_file_id = uf.id
-                        )
-                        LIMIT :batch_size
-                    """
-                        ),
-                        {"batch_size": batch_size},
-                    )
-
-                    inserted = result.rowcount
-                    total_inserted += inserted
-
-                    if inserted < batch_size:
-                        break
-
-                    logger.info(
-                        f"  Migrated {total_inserted}/{to_migrate} relationships..."
-                    )
-
-                logger.info(
-                    f"Created {total_inserted} persona__user_file relationships"
-                )
-
-    # === Step 2: Add foreign key for chat_session.project_id ===
-    chat_session_fks = inspector.get_foreign_keys("chat_session")
-    fk_exists = any(
-        fk["name"] == "fk_chat_session_project_id" for fk in chat_session_fks
-    )
-
-    if not fk_exists:
-        logger.info("Adding foreign key constraint for chat_session.project_id...")
-        op.create_foreign_key(
-            "fk_chat_session_project_id",
-            "chat_session",
-            "user_project",
-            ["project_id"],
-            ["id"],
-        )
-        logger.info("Added foreign key constraint")
-
-    # === Step 3: Populate project__user_file from user_file.folder_id ===
-    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
-    has_new_id = "new_id" in user_file_columns
-
-    if has_new_id and "folder_id" in user_file_columns:
-        logger.info("Populating project__user_file from folder relationships...")
-
-        # Count relationships to create
-        count_query = text(
-            """
-            SELECT COUNT(*)
-            FROM user_file uf
-            WHERE uf.folder_id IS NOT NULL
-            AND NOT EXISTS (
-                SELECT 1
-                FROM project__user_file puf
-                WHERE puf.project_id = uf.folder_id
-                AND puf.user_file_id = uf.new_id
-            )
-        """
-        )
-        to_create = bind.execute(count_query).scalar_one()
-
-        if to_create > 0:
-            logger.info(f"Creating {to_create} project-file relationships...")
-
-            # Insert in batches
-            batch_size = 10000
-            total_inserted = 0
-
-            while True:
-                result = bind.execute(
-                    text(
-                        """
-                    INSERT INTO project__user_file (project_id, user_file_id)
-                    SELECT uf.folder_id, uf.new_id
-                    FROM user_file uf
-                    WHERE uf.folder_id IS NOT NULL
-                    AND NOT EXISTS (
-                        SELECT 1
-                        FROM project__user_file puf
-                        WHERE puf.project_id = uf.folder_id
-                        AND puf.user_file_id = uf.new_id
-                    )
-                    LIMIT :batch_size
-                    ON CONFLICT (project_id, user_file_id) DO NOTHING
-                """
-                    ),
-                    {"batch_size": batch_size},
-                )
-
-                inserted = result.rowcount
-                total_inserted += inserted
-
-                if inserted < batch_size:
-                    break
-
-                logger.info(f"  Created {total_inserted}/{to_create} relationships...")
-
-            logger.info(f"Created {total_inserted} project__user_file relationships")
-
-    # === Step 4: Create index on chat_session.project_id ===
-    try:
-        indexes = [ix.get("name") for ix in inspector.get_indexes("chat_session")]
-    except Exception:
-        indexes = []
-
-    if "ix_chat_session_project_id" not in indexes:
-        logger.info("Creating index on chat_session.project_id...")
-        op.create_index(
-            "ix_chat_session_project_id", "chat_session", ["project_id"], unique=False
-        )
-        logger.info("Created index")
-
-    logger.info("Migration 3 (relationship migration) completed successfully")
-
-
-def downgrade() -> None:
-    """Remove migrated relationships and constraints."""
-
-    bind = op.get_bind()
-    inspector = sa.inspect(bind)
-
-    logger.info("Starting downgrade of relationship migration...")
-
-    # Drop index on chat_session.project_id
-    try:
-        indexes = [ix.get("name") for ix in inspector.get_indexes("chat_session")]
-        if "ix_chat_session_project_id" in indexes:
-            op.drop_index("ix_chat_session_project_id", "chat_session")
-            logger.info("Dropped index on chat_session.project_id")
-    except Exception:
-        pass
-
-    # Drop foreign key constraint
-    try:
-        chat_session_fks = inspector.get_foreign_keys("chat_session")
-        fk_exists = any(
-            fk["name"] == "fk_chat_session_project_id" for fk in chat_session_fks
-        )
-        if fk_exists:
-            op.drop_constraint(
-                "fk_chat_session_project_id", "chat_session", type_="foreignkey"
-            )
-            logger.info("Dropped foreign key constraint on chat_session.project_id")
-    except Exception:
-        pass
-
-    # Clear project__user_file relationships (but keep the table for migration 1 to handle)
-    if "project__user_file" in inspector.get_table_names():
-        result = bind.execute(text("DELETE FROM project__user_file"))
-        logger.info(f"Cleared {result.rowcount} records from project__user_file")
-
-    # Remove migrated persona__user_file relationships
-    # Only remove those that came from folder relationships
-    if all(
-        table in inspector.get_table_names()
-        for table in ["persona__user_file", "persona__user_folder", "user_file"]
-    ):
-        user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
-        if "folder_id" in user_file_columns:
-            result = bind.execute(
-                text(
-                    """
-                DELETE FROM persona__user_file puf
-                WHERE EXISTS (
-                    SELECT 1
-                    FROM user_file uf
-                    JOIN persona__user_folder puf2
-                        ON puf2.user_folder_id = uf.folder_id
-                    WHERE puf.persona_id = puf2.persona_id
-                    AND puf.user_file_id = uf.id
-                )
-            """
-                )
-            )
-            logger.info(
-                f"Removed {result.rowcount} migrated persona__user_file relationships"
-            )
-
-    logger.info("Downgrade completed successfully")
--- a/backend/alembic/versions/1c3f8a7b5d4e_add_python_tool.py
+++ b/backend/alembic/versions/1c3f8a7b5d4e_add_python_tool.py
@@ -0,0 +1,73 @@
+"""add_python_tool
+
+Revision ID: 1c3f8a7b5d4e
+Revises: 505c488f6662
+Create Date: 2025-02-14 00:00:00
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "1c3f8a7b5d4e"
+down_revision = "505c488f6662"
+branch_labels = None
+depends_on = None
+
+
+PYTHON_TOOL = {
+    "name": "PythonTool",
+    "display_name": "Code Interpreter",
+    "description": (
+        "The Code Interpreter Action lets assistants execute Python in an isolated runtime. "
+        "It can process staged files, read and write artifacts, stream stdout and stderr, "
+        "and return generated outputs for the chat session."
+    ),
+    "in_code_tool_id": "PythonTool",
+}
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    conn.execute(sa.text("BEGIN"))
+    try:
+        existing = conn.execute(
+            sa.text("SELECT id FROM tool WHERE in_code_tool_id = :in_code_tool_id"),
+            PYTHON_TOOL,
+        ).fetchone()
+
+        if existing:
+            conn.execute(
+                sa.text(
+                    """
+                    UPDATE tool
+                    SET name = :name,
+                        display_name = :display_name,
+                        description = :description
+                    WHERE in_code_tool_id = :in_code_tool_id
+                    """
+                ),
+                PYTHON_TOOL,
+            )
+        else:
+            conn.execute(
+                sa.text(
+                    """
+                    INSERT INTO tool (name, display_name, description, in_code_tool_id)
+                    VALUES (:name, :display_name, :description, :in_code_tool_id)
+                    """
+                ),
+                PYTHON_TOOL,
+            )
+
+        conn.execute(sa.text("COMMIT"))
+    except Exception:
+        conn.execute(sa.text("ROLLBACK"))
+        raise
+
+
+def downgrade() -> None:
+    # Do not delete the tool entry on downgrade; leaving it is safe and keeps migrations idempotent.
+    pass
--- a/backend/alembic/versions/2b75d0a8ffcb_user_file_schema_cleanup.py
+++ b/backend/alembic/versions/2b75d0a8ffcb_user_file_schema_cleanup.py
@@ -1,218 +0,0 @@
-"""Migration 6: User file schema cleanup
-
-Revision ID: 2b75d0a8ffcb
-Revises: 3a78dba1080a
-Create Date: 2025-09-22 10:09:26.375377
-
-This migration removes legacy columns and tables after data migration is complete.
-It should only be run after verifying all data has been successfully migrated.
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy import text
-import logging
-
-logger = logging.getLogger("alembic.runtime.migration")
-
-# revision identifiers, used by Alembic.
-revision = "2b75d0a8ffcb"
-down_revision = "3a78dba1080a"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    """Remove legacy columns and tables."""
-
-    bind = op.get_bind()
-    inspector = sa.inspect(bind)
-
-    logger.info("Starting schema cleanup...")
-
-    # === Step 1: Verify data migration is complete ===
-    logger.info("Verifying data migration completion...")
-
-    # Check if any chat sessions still have folder_id references
-    chat_session_columns = [
-        col["name"] for col in inspector.get_columns("chat_session")
-    ]
-    if "folder_id" in chat_session_columns:
-        orphaned_count = bind.execute(
-            text(
-                """
-            SELECT COUNT(*) FROM chat_session
-            WHERE folder_id IS NOT NULL AND project_id IS NULL
-        """
-            )
-        ).scalar_one()
-
-        if orphaned_count > 0:
-            logger.warning(
-                f"WARNING: {orphaned_count} chat_session records still have "
-                f"folder_id without project_id. Proceeding anyway."
-            )
-
-    # === Step 2: Drop chat_session.folder_id ===
-    if "folder_id" in chat_session_columns:
-        logger.info("Dropping chat_session.folder_id...")
-
-        # Drop foreign key constraint first
-        op.execute(
-            "ALTER TABLE chat_session DROP CONSTRAINT IF EXISTS chat_session_folder_fk"
-        )
-
-        # Drop the column
-        op.drop_column("chat_session", "folder_id")
-        logger.info("Dropped chat_session.folder_id")
-
-    # === Step 3: Drop persona__user_folder table ===
-    if "persona__user_folder" in inspector.get_table_names():
-        logger.info("Dropping persona__user_folder table...")
-
-        # Check for any remaining data
-        remaining = bind.execute(
-            text("SELECT COUNT(*) FROM persona__user_folder")
-        ).scalar_one()
-
-        if remaining > 0:
-            logger.warning(
-                f"WARNING: Dropping persona__user_folder with {remaining} records"
-            )
-
-        op.drop_table("persona__user_folder")
-        logger.info("Dropped persona__user_folder table")
-
-    # === Step 4: Drop chat_folder table ===
-    if "chat_folder" in inspector.get_table_names():
-        logger.info("Dropping chat_folder table...")
-
-        # Check for any remaining data
-        remaining = bind.execute(text("SELECT COUNT(*) FROM chat_folder")).scalar_one()
-
-        if remaining > 0:
-            logger.warning(f"WARNING: Dropping chat_folder with {remaining} records")
-
-        op.drop_table("chat_folder")
-        logger.info("Dropped chat_folder table")
-
-    # === Step 5: Drop user_file legacy columns ===
-    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
-
-    # Drop folder_id
-    if "folder_id" in user_file_columns:
-        logger.info("Dropping user_file.folder_id...")
-        op.drop_column("user_file", "folder_id")
-        logger.info("Dropped user_file.folder_id")
-
-    # Drop cc_pair_id (already handled in migration 5, but be sure)
-    if "cc_pair_id" in user_file_columns:
-        logger.info("Dropping user_file.cc_pair_id...")
-
-        # Drop any remaining foreign key constraints
-        bind.execute(
-            text(
-                """
-            DO $$
-            DECLARE r RECORD;
-            BEGIN
-              FOR r IN (
-                SELECT conname
-                FROM pg_constraint c
-                JOIN pg_class t ON c.conrelid = t.oid
-                WHERE c.contype = 'f'
-                  AND t.relname = 'user_file'
-                  AND EXISTS (
-                    SELECT 1 FROM pg_attribute a
-                    WHERE a.attrelid = t.oid
-                    AND a.attname = 'cc_pair_id'
-                  )
-              ) LOOP
-                EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
-              END LOOP;
-            END$$;
-        """
-            )
-        )
-
-        op.drop_column("user_file", "cc_pair_id")
-        logger.info("Dropped user_file.cc_pair_id")
-
-    # === Step 6: Clean up any remaining constraints ===
-    logger.info("Cleaning up remaining constraints...")
-
-    # Drop any unique constraints on removed columns
-    op.execute(
-        "ALTER TABLE user_file DROP CONSTRAINT IF EXISTS user_file_cc_pair_id_key"
-    )
-
-    logger.info("Migration 6 (schema cleanup) completed successfully")
-    logger.info("Legacy schema has been fully removed")
-
-
-def downgrade() -> None:
-    """Recreate dropped columns and tables (structure only, no data)."""
-
-    bind = op.get_bind()
-    inspector = sa.inspect(bind)
-
-    logger.warning("Downgrading schema cleanup - recreating structure only, no data!")
-
-    # Recreate user_file columns
-    if "user_file" in inspector.get_table_names():
-        columns = [col["name"] for col in inspector.get_columns("user_file")]
-
-        if "cc_pair_id" not in columns:
-            op.add_column(
-                "user_file", sa.Column("cc_pair_id", sa.Integer(), nullable=True)
-            )
-
-        if "folder_id" not in columns:
-            op.add_column(
-                "user_file", sa.Column("folder_id", sa.Integer(), nullable=True)
-            )
-
-    # Recreate chat_folder table
-    if "chat_folder" not in inspector.get_table_names():
-        op.create_table(
-            "chat_folder",
-            sa.Column("id", sa.Integer(), nullable=False),
-            sa.Column("user_id", sa.UUID(), nullable=False),
-            sa.Column("name", sa.String(), nullable=False),
-            sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
-            sa.PrimaryKeyConstraint("id"),
-            sa.ForeignKeyConstraint(
-                ["user_id"], ["user.id"], name="chat_folder_user_fk"
-            ),
-        )
-
-    # Recreate persona__user_folder table
-    if "persona__user_folder" not in inspector.get_table_names():
-        op.create_table(
-            "persona__user_folder",
-            sa.Column("persona_id", sa.Integer(), nullable=False),
-            sa.Column("user_folder_id", sa.Integer(), nullable=False),
-            sa.PrimaryKeyConstraint("persona_id", "user_folder_id"),
-            sa.ForeignKeyConstraint(["persona_id"], ["persona.id"]),
-            sa.ForeignKeyConstraint(["user_folder_id"], ["user_project.id"]),
-        )
-
-    # Add folder_id back to chat_session
-    if "chat_session" in inspector.get_table_names():
-        columns = [col["name"] for col in inspector.get_columns("chat_session")]
-        if "folder_id" not in columns:
-            op.add_column(
-                "chat_session", sa.Column("folder_id", sa.Integer(), nullable=True)
-            )
-
-            # Add foreign key if chat_folder exists
-            if "chat_folder" in inspector.get_table_names():
-                op.create_foreign_key(
-                    "chat_session_folder_fk",
-                    "chat_session",
-                    "chat_folder",
-                    ["folder_id"],
-                    ["id"],
-                )
-
-    logger.info("Downgrade completed - structure recreated but data is lost")
--- a/backend/alembic/versions/3a78dba1080a_user_file_legacy_data_cleanup.py
+++ b/backend/alembic/versions/3a78dba1080a_user_file_legacy_data_cleanup.py
@@ -1,298 +0,0 @@
-"""Migration 5: User file legacy data cleanup
-
-Revision ID: 3a78dba1080a
-Revises: 7cc3fcc116c1
-Create Date: 2025-09-22 10:04:27.986294
-
-This migration removes legacy user-file documents and connector_credential_pairs.
-It performs bulk deletions of obsolete data after the UUID migration.
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql as psql
-from sqlalchemy import text
-import logging
-from typing import List
-import uuid
-
-logger = logging.getLogger("alembic.runtime.migration")
-
-# revision identifiers, used by Alembic.
-revision = "3a78dba1080a"
-down_revision = "7cc3fcc116c1"
-branch_labels = None
-depends_on = None
-
-
-def batch_delete(
-    bind: sa.engine.Connection,
-    table_name: str,
-    id_column: str,
-    ids: List[str | int | uuid.UUID],
-    batch_size: int = 1000,
-    id_type: str = "int",
-) -> int:
-    """Delete records in batches to avoid memory issues and timeouts."""
-    total_count = len(ids)
-    if total_count == 0:
-        return 0
-
-    logger.info(
-        f"Starting batch deletion of {total_count} records from {table_name}..."
-    )
-
-    # Determine appropriate ARRAY type
-    if id_type == "uuid":
-        array_type = psql.ARRAY(psql.UUID(as_uuid=True))
-    elif id_type == "int":
-        array_type = psql.ARRAY(sa.Integer())
-    else:
-        array_type = psql.ARRAY(sa.String())
-
-    total_deleted = 0
-    failed_batches = []
-
-    for i in range(0, total_count, batch_size):
-        batch_ids = ids[i : i + batch_size]
-        try:
-            stmt = text(
-                f"DELETE FROM {table_name} WHERE {id_column} = ANY(:ids)"
-            ).bindparams(sa.bindparam("ids", value=batch_ids, type_=array_type))
-            result = bind.execute(stmt)
-            total_deleted += result.rowcount
-
-            # Log progress every 10 batches or at completion
-            batch_num = (i // batch_size) + 1
-            if batch_num % 10 == 0 or i + batch_size >= total_count:
-                logger.info(
-                    f"  Deleted {min(i + batch_size, total_count)}/{total_count} records "
-                    f"({total_deleted} actual) from {table_name}"
-                )
-        except Exception as e:
-            logger.error(f"Failed to delete batch {(i // batch_size) + 1}: {e}")
-            failed_batches.append((i, min(i + batch_size, total_count)))
-
-    if failed_batches:
-        logger.warning(
-            f"Failed to delete {len(failed_batches)} batches from {table_name}. "
-            f"Total deleted: {total_deleted}/{total_count}"
-        )
-        # Fail the migration to avoid silently succeeding on partial cleanup
-        raise RuntimeError(
-            f"Batch deletion failed for {table_name}: "
-            f"{len(failed_batches)} failed batches out of "
-            f"{(total_count + batch_size - 1) // batch_size}."
-        )
-
-    return total_deleted
-
-
-def upgrade() -> None:
-    """Remove legacy user-file documents and connector_credential_pairs."""
-
-    bind = op.get_bind()
-    inspector = sa.inspect(bind)
-
-    logger.info("Starting legacy data cleanup...")
-
-    # === Step 1: Identify and delete user-file documents ===
-    logger.info("Identifying user-file documents to delete...")
-
-    # Get document IDs to delete
-    doc_rows = bind.execute(
-        text(
-            """
-        SELECT DISTINCT dcc.id AS document_id
-        FROM document_by_connector_credential_pair dcc
-        JOIN connector_credential_pair u
-          ON u.connector_id = dcc.connector_id
-         AND u.credential_id = dcc.credential_id
-        WHERE u.is_user_file IS TRUE
-    """
-        )
-    ).fetchall()
-
-    doc_ids = [r[0] for r in doc_rows]
-
-    if doc_ids:
-        logger.info(f"Found {len(doc_ids)} user-file documents to delete")
-
-        # Delete dependent rows first
-        tables_to_clean = [
-            ("document_retrieval_feedback", "document_id"),
-            ("document__tag", "document_id"),
-            ("chunk_stats", "document_id"),
-        ]
-
-        for table_name, column_name in tables_to_clean:
-            if table_name in inspector.get_table_names():
-                # document_id is a string in these tables
-                deleted = batch_delete(
-                    bind, table_name, column_name, doc_ids, id_type="str"
-                )
-                logger.info(f"Deleted {deleted} records from {table_name}")
-
-        # Delete document_by_connector_credential_pair entries
-        deleted = batch_delete(
-            bind, "document_by_connector_credential_pair", "id", doc_ids, id_type="str"
-        )
-        logger.info(f"Deleted {deleted} document_by_connector_credential_pair records")
-
-        # Delete documents themselves
-        deleted = batch_delete(bind, "document", "id", doc_ids, id_type="str")
-        logger.info(f"Deleted {deleted} document records")
-    else:
-        logger.info("No user-file documents found to delete")
-
-    # === Step 2: Clean up user-file connector_credential_pairs ===
-    logger.info("Cleaning up user-file connector_credential_pairs...")
-
-    # Get cc_pair IDs
-    cc_pair_rows = bind.execute(
-        text(
-            """
-        SELECT id AS cc_pair_id
-        FROM connector_credential_pair
-        WHERE is_user_file IS TRUE
-    """
-        )
-    ).fetchall()
-
-    cc_pair_ids = [r[0] for r in cc_pair_rows]
-
-    if cc_pair_ids:
-        logger.info(
-            f"Found {len(cc_pair_ids)} user-file connector_credential_pairs to clean up"
-        )
-
-        # Delete related records
-        # Clean child tables first to satisfy foreign key constraints,
-        # then the parent tables
-        tables_to_clean = [
-            ("index_attempt_errors", "connector_credential_pair_id"),
-            ("index_attempt", "connector_credential_pair_id"),
-            ("background_error", "cc_pair_id"),
-            ("document_set__connector_credential_pair", "connector_credential_pair_id"),
-            ("user_group__connector_credential_pair", "cc_pair_id"),
-        ]
-
-        for table_name, column_name in tables_to_clean:
-            if table_name in inspector.get_table_names():
-                deleted = batch_delete(
-                    bind, table_name, column_name, cc_pair_ids, id_type="int"
-                )
-                logger.info(f"Deleted {deleted} records from {table_name}")
-
-    # === Step 3: Identify connectors and credentials to delete ===
-    logger.info("Identifying orphaned connectors and credentials...")
-
-    # Get connectors used only by user-file cc_pairs
-    connector_rows = bind.execute(
-        text(
-            """
-        SELECT DISTINCT ccp.connector_id
-        FROM connector_credential_pair ccp
-        WHERE ccp.is_user_file IS TRUE
-          AND ccp.connector_id != 0  -- Exclude system default
-          AND NOT EXISTS (
-            SELECT 1
-            FROM connector_credential_pair c2
-            WHERE c2.connector_id = ccp.connector_id
-              AND c2.is_user_file IS NOT TRUE
-          )
-    """
-        )
-    ).fetchall()
-
-    userfile_only_connector_ids = [r[0] for r in connector_rows]
-
-    # Get credentials used only by user-file cc_pairs
-    credential_rows = bind.execute(
-        text(
-            """
-        SELECT DISTINCT ccp.credential_id
-        FROM connector_credential_pair ccp
-        WHERE ccp.is_user_file IS TRUE
-          AND ccp.credential_id != 0  -- Exclude public/default
-          AND NOT EXISTS (
-            SELECT 1
-            FROM connector_credential_pair c2
-            WHERE c2.credential_id = ccp.credential_id
-              AND c2.is_user_file IS NOT TRUE
-          )
-    """
-        )
-    ).fetchall()
-
-    userfile_only_credential_ids = [r[0] for r in credential_rows]
-
-    # === Step 4: Delete the cc_pairs themselves ===
-    if cc_pair_ids:
-        # Remove FK dependency from user_file first
-        bind.execute(
-            text(
-                """
-            DO $$
-            DECLARE r RECORD;
-            BEGIN
-              FOR r IN (
-                SELECT conname
-                FROM pg_constraint c
-                JOIN pg_class t ON c.conrelid = t.oid
-                JOIN pg_class ft ON c.confrelid = ft.oid
-                WHERE c.contype = 'f'
-                  AND t.relname = 'user_file'
-                  AND ft.relname = 'connector_credential_pair'
-              ) LOOP
-                EXECUTE format('ALTER TABLE user_file DROP CONSTRAINT IF EXISTS %I', r.conname);
-              END LOOP;
-            END$$;
-        """
-            )
-        )
-
-        # Delete cc_pairs
-        deleted = batch_delete(
-            bind, "connector_credential_pair", "id", cc_pair_ids, id_type="int"
-        )
-        logger.info(f"Deleted {deleted} connector_credential_pair records")
-
-    # === Step 5: Delete orphaned connectors ===
-    if userfile_only_connector_ids:
-        deleted = batch_delete(
-            bind, "connector", "id", userfile_only_connector_ids, id_type="int"
-        )
-        logger.info(f"Deleted {deleted} orphaned connector records")
-
-    # === Step 6: Delete orphaned credentials ===
-    if userfile_only_credential_ids:
-        # Clean up credential__user_group mappings first
-        deleted = batch_delete(
-            bind,
-            "credential__user_group",
-            "credential_id",
-            userfile_only_credential_ids,
-            id_type="int",
-        )
-        logger.info(f"Deleted {deleted} credential__user_group records")
-
-        # Delete credentials
-        deleted = batch_delete(
-            bind, "credential", "id", userfile_only_credential_ids, id_type="int"
-        )
-        logger.info(f"Deleted {deleted} orphaned credential records")
-
-    logger.info("Migration 5 (legacy data cleanup) completed successfully")
-
-
-def downgrade() -> None:
-    """Cannot restore deleted data - requires backup restoration."""
-
-    logger.error("CRITICAL: Downgrading data cleanup cannot restore deleted data!")
-    logger.error("Data restoration requires backup files or database backup.")
-
-    raise NotImplementedError(
-        "Downgrade of legacy data cleanup is not supported. "
-        "Deleted data must be restored from backups."
-    )
--- a/backend/alembic/versions/64bd5677aeb6_add_image_input_support_to_model_config.py
+++ b/backend/alembic/versions/64bd5677aeb6_add_image_input_support_to_model_config.py
@@ -1,37 +0,0 @@
-"""Add image input support to model config
-
-Revision ID: 64bd5677aeb6
-Revises: b30353be4eec
-Create Date: 2025-09-28 15:48:12.003612
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "64bd5677aeb6"
-down_revision = "b30353be4eec"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "model_configuration",
-        sa.Column("supports_image_input", sa.Boolean(), nullable=True),
-    )
-
-    # Seems to be left over from when model visibility was introduced and a nullable field.
-    # Set any null is_visible values to False
-    connection = op.get_bind()
-    connection.execute(
-        sa.text(
-            "UPDATE model_configuration SET is_visible = false WHERE is_visible IS NULL"
-        )
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("model_configuration", "supports_image_input")
--- a/backend/alembic/versions/7cc3fcc116c1_user_file_uuid_primary_key_swap.py
+++ b/backend/alembic/versions/7cc3fcc116c1_user_file_uuid_primary_key_swap.py
@@ -1,193 +0,0 @@
-"""Migration 4: User file UUID primary key swap
-
-Revision ID: 7cc3fcc116c1
-Revises: 16c37a30adf2
-Create Date: 2025-09-22 09:54:38.292952
-
-This migration performs the critical UUID primary key swap on user_file table.
-It updates all foreign key references to use UUIDs instead of integers.
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql as psql
-import logging
-
-logger = logging.getLogger("alembic.runtime.migration")
-
-# revision identifiers, used by Alembic.
-revision = "7cc3fcc116c1"
-down_revision = "16c37a30adf2"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    """Swap user_file primary key from integer to UUID."""
-
-    bind = op.get_bind()
-    inspector = sa.inspect(bind)
-
-    # Verify we're in the expected state
-    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
-    if "new_id" not in user_file_columns:
-        logger.warning(
-            "user_file.new_id not found - migration may have already been applied"
-        )
-        return
-
-    logger.info("Starting UUID primary key swap...")
-
-    # === Step 1: Update persona__user_file foreign key to UUID ===
-    logger.info("Updating persona__user_file foreign key...")
-
-    # Drop existing foreign key constraints
-    op.execute(
-        "ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_user_file_id_uuid_fkey"
-    )
-    op.execute(
-        "ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_user_file_id_fkey"
-    )
-
-    # Create new foreign key to user_file.new_id
-    op.create_foreign_key(
-        "persona__user_file_user_file_id_fkey",
-        "persona__user_file",
-        "user_file",
-        local_cols=["user_file_id_uuid"],
-        remote_cols=["new_id"],
-    )
-
-    # Drop the old integer column and rename UUID column
-    op.execute("ALTER TABLE persona__user_file DROP COLUMN IF EXISTS user_file_id")
-    op.alter_column(
-        "persona__user_file",
-        "user_file_id_uuid",
-        new_column_name="user_file_id",
-        existing_type=psql.UUID(as_uuid=True),
-        nullable=False,
-    )
-
-    # Recreate composite primary key
-    op.execute(
-        "ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_pkey"
-    )
-    op.execute(
-        "ALTER TABLE persona__user_file ADD PRIMARY KEY (persona_id, user_file_id)"
-    )
-
-    logger.info("Updated persona__user_file to use UUID foreign key")
-
-    # === Step 2: Perform the primary key swap on user_file ===
-    logger.info("Swapping user_file primary key to UUID...")
-
-    # Drop the primary key constraint
-    op.execute("ALTER TABLE user_file DROP CONSTRAINT IF EXISTS user_file_pkey")
-
-    # Drop the old id column and rename new_id to id
-    op.execute("ALTER TABLE user_file DROP COLUMN IF EXISTS id")
-    op.alter_column(
-        "user_file",
-        "new_id",
-        new_column_name="id",
-        existing_type=psql.UUID(as_uuid=True),
-        nullable=False,
-    )
-
-    # Set default for new inserts
-    op.alter_column(
-        "user_file",
-        "id",
-        existing_type=psql.UUID(as_uuid=True),
-        server_default=sa.text("gen_random_uuid()"),
-    )
-
-    # Create new primary key
-    op.execute("ALTER TABLE user_file ADD PRIMARY KEY (id)")
-
-    logger.info("Swapped user_file primary key to UUID")
-
-    # === Step 3: Update foreign key constraints ===
-    logger.info("Updating foreign key constraints...")
-
-    # Recreate persona__user_file foreign key to point to user_file.id
-    # Drop existing FK first to break dependency on the unique constraint
-    op.execute(
-        "ALTER TABLE persona__user_file DROP CONSTRAINT IF EXISTS persona__user_file_user_file_id_fkey"
-    )
-    # Drop the unique constraint on (formerly) new_id BEFORE recreating the FK,
-    # so the FK will bind to the primary key instead of the unique index.
-    op.execute("ALTER TABLE user_file DROP CONSTRAINT IF EXISTS uq_user_file_new_id")
-    # Now recreate FK to the primary key column
-    op.create_foreign_key(
-        "persona__user_file_user_file_id_fkey",
-        "persona__user_file",
-        "user_file",
-        local_cols=["user_file_id"],
-        remote_cols=["id"],
-    )
-
-    # Add foreign keys for project__user_file
-    existing_fks = inspector.get_foreign_keys("project__user_file")
-
-    has_user_file_fk = any(
-        fk.get("referred_table") == "user_file"
-        and fk.get("constrained_columns") == ["user_file_id"]
-        for fk in existing_fks
-    )
-
-    if not has_user_file_fk:
-        op.create_foreign_key(
-            "fk_project__user_file_user_file_id",
-            "project__user_file",
-            "user_file",
-            ["user_file_id"],
-            ["id"],
-        )
-        logger.info("Added project__user_file -> user_file foreign key")
-
-    has_project_fk = any(
-        fk.get("referred_table") == "user_project"
-        and fk.get("constrained_columns") == ["project_id"]
-        for fk in existing_fks
-    )
-
-    if not has_project_fk:
-        op.create_foreign_key(
-            "fk_project__user_file_project_id",
-            "project__user_file",
-            "user_project",
-            ["project_id"],
-            ["id"],
-        )
-        logger.info("Added project__user_file -> user_project foreign key")
-
-    # === Step 4: Mark files for document_id migration ===
-    logger.info("Marking files for background document_id migration...")
-
-    logger.info("Migration 4 (UUID primary key swap) completed successfully")
-    logger.info(
-        "NOTE: Background task will update document IDs in Vespa and search_doc"
-    )
-
-
-def downgrade() -> None:
-    """Revert UUID primary key back to integer (data destructive!)."""
-
-    logger.error("CRITICAL: Downgrading UUID primary key swap is data destructive!")
-    logger.error(
-        "This will break all UUID-based references created after the migration."
-    )
-    logger.error("Only proceed if absolutely necessary and have backups.")
-
-    # The downgrade would need to:
-    # 1. Add back integer columns
-    # 2. Generate new sequential IDs
-    # 3. Update all foreign key references
-    # 4. Swap primary keys back
-    # This is complex and risky, so we raise an error instead
-
-    raise NotImplementedError(
-        "Downgrade of UUID primary key swap is not supported due to data loss risk. "
-        "Manual intervention with data backup/restore is required."
-    )
--- a/backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
+++ b/backend/alembic/versions/9b66d3156fc6_user_file_schema_additions.py
@@ -1,257 +0,0 @@
-"""Migration 1: User file schema additions
-
-Revision ID: 9b66d3156fc6
-Revises: b4ef3ae0bf6e
-Create Date: 2025-09-22 09:42:06.086732
-
-This migration adds new columns and tables without modifying existing data.
-It is safe to run and can be easily rolled back.
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql as psql
-import logging
-
-logger = logging.getLogger("alembic.runtime.migration")
-# revision identifiers, used by Alembic.
-revision = "9b66d3156fc6"
-down_revision = "b4ef3ae0bf6e"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    """Add new columns and tables without modifying existing data."""
-
-    # Enable pgcrypto for UUID generation
-    op.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto")
-
-    bind = op.get_bind()
-    inspector = sa.inspect(bind)
-
-    # === USER_FILE: Add new columns ===
-    logger.info("Adding new columns to user_file table...")
-
-    user_file_columns = [col["name"] for col in inspector.get_columns("user_file")]
-
-    # Check if ID is already UUID (in case of re-run after partial migration)
-    id_is_uuid = any(
-        col["name"] == "id" and "uuid" in str(col["type"]).lower()
-        for col in inspector.get_columns("user_file")
-    )
-
-    # Add transitional UUID column only if ID is not already UUID
-    if "new_id" not in user_file_columns and not id_is_uuid:
-        op.add_column(
-            "user_file",
-            sa.Column(
-                "new_id",
-                psql.UUID(as_uuid=True),
-                nullable=True,
-                server_default=sa.text("gen_random_uuid()"),
-            ),
-        )
-        op.create_unique_constraint("uq_user_file_new_id", "user_file", ["new_id"])
-        logger.info("Added new_id column to user_file")
-
-    # Add status column
-    if "status" not in user_file_columns:
-        op.add_column(
-            "user_file",
-            sa.Column(
-                "status",
-                sa.Enum(
-                    "PROCESSING",
-                    "COMPLETED",
-                    "FAILED",
-                    "CANCELED",
-                    name="userfilestatus",
-                    native_enum=False,
-                ),
-                nullable=False,
-                server_default="PROCESSING",
-            ),
-        )
-        logger.info("Added status column to user_file")
-
-    # Add other tracking columns
-    if "chunk_count" not in user_file_columns:
-        op.add_column(
-            "user_file", sa.Column("chunk_count", sa.Integer(), nullable=True)
-        )
-        logger.info("Added chunk_count column to user_file")
-
-    if "last_accessed_at" not in user_file_columns:
-        op.add_column(
-            "user_file",
-            sa.Column("last_accessed_at", sa.DateTime(timezone=True), nullable=True),
-        )
-        logger.info("Added last_accessed_at column to user_file")
-
-    if "needs_project_sync" not in user_file_columns:
-        op.add_column(
-            "user_file",
-            sa.Column(
-                "needs_project_sync",
-                sa.Boolean(),
-                nullable=False,
-                server_default=sa.text("false"),
-            ),
-        )
-        logger.info("Added needs_project_sync column to user_file")
-
-    if "last_project_sync_at" not in user_file_columns:
-        op.add_column(
-            "user_file",
-            sa.Column(
-                "last_project_sync_at", sa.DateTime(timezone=True), nullable=True
-            ),
-        )
-        logger.info("Added last_project_sync_at column to user_file")
-
-    if "document_id_migrated" not in user_file_columns:
-        op.add_column(
-            "user_file",
-            sa.Column(
-                "document_id_migrated",
-                sa.Boolean(),
-                nullable=False,
-                server_default=sa.text("true"),
-            ),
-        )
-        logger.info("Added document_id_migrated column to user_file")
-
-    # === USER_FOLDER -> USER_PROJECT rename ===
-    table_names = set(inspector.get_table_names())
-
-    if "user_folder" in table_names:
-        logger.info("Updating user_folder table...")
-        # Make description nullable first
-        op.alter_column("user_folder", "description", nullable=True)
-
-        # Rename table if user_project doesn't exist
-        if "user_project" not in table_names:
-            op.execute("ALTER TABLE user_folder RENAME TO user_project")
-            logger.info("Renamed user_folder to user_project")
-    elif "user_project" in table_names:
-        # If already renamed, ensure column nullability
-        project_cols = [col["name"] for col in inspector.get_columns("user_project")]
-        if "description" in project_cols:
-            op.alter_column("user_project", "description", nullable=True)
-
-    # Add instructions column to user_project
-    inspector = sa.inspect(bind)  # Refresh after rename
-    if "user_project" in inspector.get_table_names():
-        project_columns = [col["name"] for col in inspector.get_columns("user_project")]
-        if "instructions" not in project_columns:
-            op.add_column(
-                "user_project",
-                sa.Column("instructions", sa.String(), nullable=True),
-            )
-            logger.info("Added instructions column to user_project")
-
-    # === CHAT_SESSION: Add project_id ===
-    chat_session_columns = [
-        col["name"] for col in inspector.get_columns("chat_session")
-    ]
-    if "project_id" not in chat_session_columns:
-        op.add_column(
-            "chat_session",
-            sa.Column("project_id", sa.Integer(), nullable=True),
-        )
-        logger.info("Added project_id column to chat_session")
-
-    # === PERSONA__USER_FILE: Add UUID column ===
-    persona_user_file_columns = [
-        col["name"] for col in inspector.get_columns("persona__user_file")
-    ]
-    if "user_file_id_uuid" not in persona_user_file_columns:
-        op.add_column(
-            "persona__user_file",
-            sa.Column("user_file_id_uuid", psql.UUID(as_uuid=True), nullable=True),
-        )
-        logger.info("Added user_file_id_uuid column to persona__user_file")
-
-    # === PROJECT__USER_FILE: Create new table ===
-    if "project__user_file" not in inspector.get_table_names():
-        op.create_table(
-            "project__user_file",
-            sa.Column("project_id", sa.Integer(), nullable=False),
-            sa.Column("user_file_id", psql.UUID(as_uuid=True), nullable=False),
-            sa.PrimaryKeyConstraint("project_id", "user_file_id"),
-        )
-        op.create_index(
-            "idx_project__user_file_user_file_id",
-            "project__user_file",
-            ["user_file_id"],
-        )
-        logger.info("Created project__user_file table")
-
-    logger.info("Migration 1 (schema additions) completed successfully")
-
-
-def downgrade() -> None:
-    """Remove added columns and tables."""
-
-    bind = op.get_bind()
-    inspector = sa.inspect(bind)
-
-    logger.info("Starting downgrade of schema additions...")
-
-    # Drop project__user_file table
-    if "project__user_file" in inspector.get_table_names():
-        op.drop_index("idx_project__user_file_user_file_id", "project__user_file")
-        op.drop_table("project__user_file")
-        logger.info("Dropped project__user_file table")
-
-    # Remove columns from persona__user_file
-    if "persona__user_file" in inspector.get_table_names():
-        columns = [col["name"] for col in inspector.get_columns("persona__user_file")]
-        if "user_file_id_uuid" in columns:
-            op.drop_column("persona__user_file", "user_file_id_uuid")
-            logger.info("Dropped user_file_id_uuid from persona__user_file")
-
-    # Remove columns from chat_session
-    if "chat_session" in inspector.get_table_names():
-        columns = [col["name"] for col in inspector.get_columns("chat_session")]
-        if "project_id" in columns:
-            op.drop_column("chat_session", "project_id")
-            logger.info("Dropped project_id from chat_session")
-
-    # Rename user_project back to user_folder and remove instructions
-    if "user_project" in inspector.get_table_names():
-        columns = [col["name"] for col in inspector.get_columns("user_project")]
-        if "instructions" in columns:
-            op.drop_column("user_project", "instructions")
-        op.execute("ALTER TABLE user_project RENAME TO user_folder")
-        op.alter_column("user_folder", "description", nullable=False)
-        logger.info("Renamed user_project back to user_folder")
-
-    # Remove columns from user_file
-    if "user_file" in inspector.get_table_names():
-        columns = [col["name"] for col in inspector.get_columns("user_file")]
-
-        columns_to_drop = [
-            "document_id_migrated",
-            "last_project_sync_at",
-            "needs_project_sync",
-            "last_accessed_at",
-            "chunk_count",
-            "status",
-        ]
-
-        for col in columns_to_drop:
-            if col in columns:
-                op.drop_column("user_file", col)
-                logger.info(f"Dropped {col} from user_file")
-
-        if "new_id" in columns:
-            op.drop_constraint("uq_user_file_new_id", "user_file", type_="unique")
-            op.drop_column("user_file", "new_id")
-            logger.info("Dropped new_id from user_file")
-
-    # Drop enum type if no columns use it
-    bind.execute(sa.text("DROP TYPE IF EXISTS userfilestatus"))
-
-    logger.info("Downgrade completed successfully")
--- a/backend/alembic/versions/b30353be4eec_add_mcp_auth_performer.py
+++ b/backend/alembic/versions/b30353be4eec_add_mcp_auth_performer.py
@@ -1,123 +0,0 @@
-"""add_mcp_auth_performer
-
-Revision ID: b30353be4eec
-Revises: 2b75d0a8ffcb
-Create Date: 2025-09-13 14:58:08.413534
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from onyx.db.enums import MCPAuthenticationPerformer, MCPTransport
-
-
-# revision identifiers, used by Alembic.
-revision = "b30353be4eec"
-down_revision = "2b75d0a8ffcb"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    """moving to a better way of handling auth performer and transport"""
-    # Add nullable column first for backward compatibility
-    op.add_column(
-        "mcp_server",
-        sa.Column(
-            "auth_performer",
-            sa.Enum(MCPAuthenticationPerformer, native_enum=False),
-            nullable=True,
-        ),
-    )
-
-    op.add_column(
-        "mcp_server",
-        sa.Column(
-            "transport",
-            sa.Enum(MCPTransport, native_enum=False),
-            nullable=True,
-        ),
-    )
-
-    # # Backfill values using existing data and inference rules
-    bind = op.get_bind()
-
-    # 1) OAUTH servers are always PER_USER
-    bind.execute(
-        sa.text(
-            """
-        UPDATE mcp_server
-        SET auth_performer = 'PER_USER'
-        WHERE auth_type = 'OAUTH'
-        """
-        )
-    )
-
-    # 2) If there is no admin connection config, mark as ADMIN (and not set yet)
-    bind.execute(
-        sa.text(
-            """
-        UPDATE mcp_server
-        SET auth_performer = 'ADMIN'
-        WHERE admin_connection_config_id IS NULL
-          AND auth_performer IS NULL
-        """
-        )
-    )
-
-    # 3) If there exists any user-specific connection config (user_email != ''), mark as PER_USER
-    bind.execute(
-        sa.text(
-            """
-        UPDATE mcp_server AS ms
-        SET auth_performer = 'PER_USER'
-        FROM mcp_connection_config AS mcc
-        WHERE mcc.mcp_server_id = ms.id
-          AND COALESCE(mcc.user_email, '') <> ''
-          AND ms.auth_performer IS NULL
-        """
-        )
-    )
-
-    # 4) Default any remaining nulls to ADMIN (covers API_TOKEN admin-managed and NONE)
-    bind.execute(
-        sa.text(
-            """
-        UPDATE mcp_server
-        SET auth_performer = 'ADMIN'
-        WHERE auth_performer IS NULL
-        """
-        )
-    )
-
-    # Finally, make the column non-nullable
-    op.alter_column(
-        "mcp_server",
-        "auth_performer",
-        existing_type=sa.Enum(MCPAuthenticationPerformer, native_enum=False),
-        nullable=False,
-    )
-
-    # Backfill transport for existing rows to STREAMABLE_HTTP, then make non-nullable
-    bind.execute(
-        sa.text(
-            """
-        UPDATE mcp_server
-        SET transport = 'STREAMABLE_HTTP'
-        WHERE transport IS NULL
-        """
-        )
-    )
-
-    op.alter_column(
-        "mcp_server",
-        "transport",
-        existing_type=sa.Enum(MCPTransport, native_enum=False),
-        nullable=False,
-    )
-
-
-def downgrade() -> None:
-    """remove cols"""
-    op.drop_column("mcp_server", "transport")
-    op.drop_column("mcp_server", "auth_performer")
--- a/backend/alembic/versions/b4ef3ae0bf6e_add_user_oauth_token_to_slack_bot.py
+++ b/backend/alembic/versions/b4ef3ae0bf6e_add_user_oauth_token_to_slack_bot.py
@@ -1,27 +0,0 @@
-"""add_user_oauth_token_to_slack_bot
-
-Revision ID: b4ef3ae0bf6e
-Revises: 505c488f6662
-Create Date: 2025-08-26 17:47:41.788462
-
-"""
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "b4ef3ae0bf6e"
-down_revision = "505c488f6662"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Add user_token column to slack_bot table
-    op.add_column("slack_bot", sa.Column("user_token", sa.LargeBinary(), nullable=True))
-
-
-def downgrade() -> None:
-    # Remove user_token column from slack_bot table
-    op.drop_column("slack_bot", "user_token")
--- a/backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/external_group_syncing/tasks.py
@@ -93,7 +93,7 @@ def _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:

    if cc_pair.access_type != AccessType.SYNC:
        task_logger.error(
-            f"Received non-sync CC Pair {cc_pair.id} for external "
+            f"Recieved non-sync CC Pair {cc_pair.id} for external "
            f"group sync. Actual access type: {cc_pair.access_type}"
        )
        return False
--- a/backend/ee/onyx/external_permissions/confluence/space_access.py
+++ b/backend/ee/onyx/external_permissions/confluence/space_access.py
@@ -124,9 +124,9 @@ def get_space_permission(
        and not space_permissions.external_user_group_ids
    ):
        logger.warning(
-            f"No permissions found for space '{space_key}'. This is very unlikely "
-            "to be correct and is more likely caused by an access token with "
-            "insufficient permissions. Make sure that the access token has Admin "
+            f"No permissions found for space '{space_key}'. This is very unlikely"
+            "to be correct and is more likely caused by an access token with"
+            "insufficient permissions. Make sure that the access token has Admin"
            f"permissions for space '{space_key}'"
        )

--- a/backend/ee/onyx/external_permissions/gmail/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/gmail/doc_sync.py
@@ -26,7 +26,7 @@ def _get_slim_doc_generator(
        else 0.0
    )

-    return gmail_connector.retrieve_all_slim_docs_perm_sync(
+    return gmail_connector.retrieve_all_slim_documents(
        start=start_time,
        end=current_time.timestamp(),
        callback=callback,
--- a/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
@@ -34,7 +34,7 @@ def _get_slim_doc_generator(
        else 0.0
    )

-    return google_drive_connector.retrieve_all_slim_docs_perm_sync(
+    return google_drive_connector.retrieve_all_slim_documents(
        start=start_time,
        end=current_time.timestamp(),
        callback=callback,
--- a/backend/ee/onyx/external_permissions/jira/page_access.py
+++ b/backend/ee/onyx/external_permissions/jira/page_access.py
@@ -59,7 +59,7 @@ def _build_holder_map(permissions: list[dict]) -> dict[str, list[Holder]]:

    for raw_perm in permissions:
        if not hasattr(raw_perm, "raw"):
-            logger.warning(f"Expected a 'raw' field, but none was found: {raw_perm=}")
+            logger.warn(f"Expected a 'raw' field, but none was found: {raw_perm=}")
            continue

        permission = Permission(**raw_perm.raw)
@@ -71,14 +71,14 @@ def _build_holder_map(permissions: list[dict]) -> dict[str, list[Holder]]:
        # In order to associate this permission to some Atlassian entity, we need the "Holder".
        # If this doesn't exist, then we cannot associate this permission to anyone; just skip.
        if not permission.holder:
-            logger.warning(
+            logger.warn(
                f"Expected to find a permission holder, but none was found: {permission=}"
            )
            continue

        type = permission.holder.get("type")
        if not type:
-            logger.warning(
+            logger.warn(
                f"Expected to find the type of permission holder, but none was found: {permission=}"
            )
            continue
--- a/backend/ee/onyx/external_permissions/slack/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/slack/doc_sync.py
@@ -105,9 +105,7 @@ def _get_slack_document_access(
    channel_permissions: dict[str, ExternalAccess],
    callback: IndexingHeartbeatInterface | None,
 ) -> Generator[DocExternalAccess, None, None]:
-    slim_doc_generator = slack_connector.retrieve_all_slim_docs_perm_sync(
-        callback=callback
-    )
+    slim_doc_generator = slack_connector.retrieve_all_slim_documents(callback=callback)

    for doc_metadata_batch in slim_doc_generator:
        for doc_metadata in doc_metadata_batch:
--- a/backend/ee/onyx/external_permissions/utils.py
+++ b/backend/ee/onyx/external_permissions/utils.py
@@ -4,7 +4,7 @@ from ee.onyx.external_permissions.perm_sync_types import FetchAllDocumentsIdsFun
 from onyx.access.models import DocExternalAccess
 from onyx.access.models import ExternalAccess
 from onyx.configs.constants import DocumentSource
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.db.models import ConnectorCredentialPair
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger
@@ -17,7 +17,7 @@ def generic_doc_sync(
    fetch_all_existing_docs_ids_fn: FetchAllDocumentsIdsFunction,
    callback: IndexingHeartbeatInterface | None,
    doc_source: DocumentSource,
-    slim_connector: SlimConnectorWithPermSync,
+    slim_connector: SlimConnector,
    label: str,
 ) -> Generator[DocExternalAccess, None, None]:
    """
@@ -40,7 +40,7 @@ def generic_doc_sync(
    newly_fetched_doc_ids: set[str] = set()

    logger.info(f"Fetching all slim documents from {doc_source}")
-    for doc_batch in slim_connector.retrieve_all_slim_docs_perm_sync(callback=callback):
+    for doc_batch in slim_connector.retrieve_all_slim_documents(callback=callback):
        logger.info(f"Got {len(doc_batch)} slim documents from {doc_source}")

        if callback:
--- a/backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py
+++ b/backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py
@@ -8,7 +8,7 @@ from sqlalchemy.orm import Session
 from ee.onyx.db.standard_answer import fetch_standard_answer_categories_by_names
 from ee.onyx.db.standard_answer import find_matching_standard_answers
 from onyx.configs.constants import MessageType
-from onyx.configs.onyxbot_configs import ONYX_BOT_REACT_EMOJI
+from onyx.configs.onyxbot_configs import DANSWER_REACT_EMOJI
 from onyx.db.chat import create_chat_session
 from onyx.db.chat import create_new_chat_message
 from onyx.db.chat import get_chat_messages_by_sessions
@@ -193,7 +193,7 @@ def _handle_standard_answers(
        db_session.commit()

        update_emote_react(
-            emoji=ONYX_BOT_REACT_EMOJI,
+            emoji=DANSWER_REACT_EMOJI,
            channel=message_info.channel_to_respond,
            message_ts=message_info.msg_to_respond,
            remove=True,
--- a/backend/ee/onyx/server/auth_check.py
+++ b/backend/ee/onyx/server/auth_check.py
@@ -16,7 +16,6 @@ EE_PUBLIC_ENDPOINT_SPECS = PUBLIC_ENDPOINT_SPECS + [
    # saml
    ("/auth/saml/authorize", {"GET"}),
    ("/auth/saml/callback", {"POST"}),
-    ("/auth/saml/callback", {"GET"}),
    ("/auth/saml/logout", {"POST"}),
 ]

--- a/backend/ee/onyx/server/query_history/api.py
+++ b/backend/ee/onyx/server/query_history/api.py
@@ -182,6 +182,7 @@ def admin_get_chat_sessions(
                time_created=chat.time_created.isoformat(),
                time_updated=chat.time_updated.isoformat(),
                shared_status=chat.shared_status,
+                folder_id=chat.folder_id,
                current_alternate_model=chat.current_alternate_model,
            )
            for chat in chat_sessions
--- a/backend/ee/onyx/server/saml.py
+++ b/backend/ee/onyx/server/saml.py
@@ -110,6 +110,7 @@ async def upsert_saml_user(email: str) -> User:


 async def prepare_from_fastapi_request(request: Request) -> dict[str, Any]:
+    form_data = await request.form()
    if request.client is None:
        raise ValueError("Invalid request for SAML")

@@ -124,27 +125,14 @@ async def prepare_from_fastapi_request(request: Request) -> dict[str, Any]:
        "post_data": {},
        "get_data": {},
    }
-
-    # Handle query parameters (for GET requests)
    if request.query_params:
-        rv["get_data"] = dict(request.query_params)
-
-    # Handle form data (for POST requests)
-    if request.method == "POST":
-        form_data = await request.form()
-        if "SAMLResponse" in form_data:
-            SAMLResponse = form_data["SAMLResponse"]
-            rv["post_data"]["SAMLResponse"] = SAMLResponse
-        if "RelayState" in form_data:
-            RelayState = form_data["RelayState"]
-            rv["post_data"]["RelayState"] = RelayState
-    else:
-        # For GET requests, check if SAMLResponse is in query params
-        if "SAMLResponse" in request.query_params:
-            rv["get_data"]["SAMLResponse"] = request.query_params["SAMLResponse"]
-        if "RelayState" in request.query_params:
-            rv["get_data"]["RelayState"] = request.query_params["RelayState"]
-
+        rv["get_data"] = (request.query_params,)
+    if "SAMLResponse" in form_data:
+        SAMLResponse = form_data["SAMLResponse"]
+        rv["post_data"]["SAMLResponse"] = SAMLResponse
+    if "RelayState" in form_data:
+        RelayState = form_data["RelayState"]
+        rv["post_data"]["RelayState"] = RelayState
    return rv


@@ -160,27 +148,10 @@ async def saml_login(request: Request) -> SAMLAuthorizeResponse:
    return SAMLAuthorizeResponse(authorization_url=callback_url)


-@router.get("/callback")
-async def saml_login_callback_get(
-    request: Request,
-    db_session: Session = Depends(get_session),
-) -> Response:
-    """Handle SAML callback via HTTP-Redirect binding (GET request)"""
-    return await _process_saml_callback(request, db_session)
-
-
@router.post("/callback")
 async def saml_login_callback(
    request: Request,
    db_session: Session = Depends(get_session),
-) -> Response:
-    """Handle SAML callback via HTTP-POST binding (POST request)"""
-    return await _process_saml_callback(request, db_session)
-
-
-async def _process_saml_callback(
-    request: Request,
-    db_session: Session,
 ) -> Response:
    req = await prepare_from_fastapi_request(request)
    auth = OneLogin_Saml2_Auth(req, custom_base_path=SAML_CONF_DIR)
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -6,6 +6,7 @@ from typing import Optional
 from fastapi import APIRouter
 from fastapi import HTTPException
 from fastapi import Request
+from litellm.exceptions import RateLimitError
 from sentence_transformers import CrossEncoder  # type: ignore
 from sentence_transformers import SentenceTransformer  # type: ignore

@@ -206,8 +207,6 @@ async def route_bi_encoder_embed(
 async def process_embed_request(
    embed_request: EmbedRequest, gpu_type: str = "UNKNOWN"
 ) -> EmbedResponse:
-    from litellm.exceptions import RateLimitError
-
    # Only local models should use this endpoint - API providers should make direct API calls
    if embed_request.provider_type is not None:
        raise ValueError(
--- a/backend/onyx/access/access.py
+++ b/backend/onyx/access/access.py
@@ -1,7 +1,6 @@
 from collections.abc import Callable
 from typing import cast

-from sqlalchemy.orm import joinedload
 from sqlalchemy.orm import Session

 from onyx.access.models import DocumentAccess
@@ -11,7 +10,6 @@ from onyx.configs.constants import PUBLIC_DOC_PAT
 from onyx.db.document import get_access_info_for_document
 from onyx.db.document import get_access_info_for_documents
 from onyx.db.models import User
-from onyx.db.models import UserFile
 from onyx.utils.variable_functionality import fetch_ee_implementation_or_noop
 from onyx.utils.variable_functionality import fetch_versioned_implementation

@@ -126,25 +124,3 @@ def source_should_fetch_permissions_during_indexing(source: DocumentSource) -> b
        ),
    )
    return _source_should_fetch_permissions_during_indexing_func(source)
-
-
-def get_access_for_user_files(
-    user_file_ids: list[str],
-    db_session: Session,
-) -> dict[str, DocumentAccess]:
-    user_files = (
-        db_session.query(UserFile)
-        .options(joinedload(UserFile.user))  # Eager load the user relationship
-        .filter(UserFile.id.in_(user_file_ids))
-        .all()
-    )
-    return {
-        str(user_file.id): DocumentAccess.build(
-            user_emails=[user_file.user.email] if user_file.user else [],
-            user_groups=[],
-            is_public=True if user_file.user is None else False,
-            external_user_emails=[],
-            external_user_group_ids=[],
-        )
-        for user_file in user_files
-    }
--- a/backend/onyx/agents/agent_search/dr/conditional_edges.py
+++ b/backend/onyx/agents/agent_search/dr/conditional_edges.py
@@ -24,8 +24,6 @@ def decision_router(state: MainState) -> list[Send | Hashable] | DRPath | str:
        return END
    elif next_tool_name == DRPath.LOGGER.value:
        return DRPath.LOGGER
-    elif next_tool_name == DRPath.CLOSER.value:
-        return DRPath.CLOSER
    else:
        return DRPath.ORCHESTRATOR

@@ -41,6 +39,7 @@ def decision_router(state: MainState) -> list[Send | Hashable] | DRPath | str:
            DRPath.WEB_SEARCH,
            DRPath.KNOWLEDGE_GRAPH,
            DRPath.IMAGE_GENERATION,
+            DRPath.PYTHON_TOOL,
        )
        and len(state.query_list) == 0
    ):
--- a/backend/onyx/agents/agent_search/dr/constants.py
+++ b/backend/onyx/agents/agent_search/dr/constants.py
@@ -21,6 +21,7 @@ AVERAGE_TOOL_COSTS: dict[DRPath, float] = {
    DRPath.WEB_SEARCH: 1.5,
    DRPath.IMAGE_GENERATION: 3.0,
    DRPath.GENERIC_TOOL: 1.5,  # TODO: see todo in OrchestratorTool
+    DRPath.PYTHON_TOOL: 2.0,
    DRPath.CLOSER: 0.0,
 }

--- a/backend/onyx/agents/agent_search/dr/enums.py
+++ b/backend/onyx/agents/agent_search/dr/enums.py
@@ -27,6 +27,7 @@ class DRPath(str, Enum):
    WEB_SEARCH = "Web Search"
    IMAGE_GENERATION = "Image Generation"
    GENERIC_INTERNAL_TOOL = "Generic Internal Tool"
+    PYTHON_TOOL = "Python"
    CLOSER = "Closer"
    LOGGER = "Logger"
    END = "End"
--- a/backend/onyx/agents/agent_search/dr/graph_builder.py
+++ b/backend/onyx/agents/agent_search/dr/graph_builder.py
@@ -26,6 +26,9 @@ from onyx.agents.agent_search.dr.sub_agents.image_generation.dr_image_generation
 from onyx.agents.agent_search.dr.sub_agents.kg_search.dr_kg_search_graph_builder import (
    dr_kg_search_graph_builder,
 )
+from onyx.agents.agent_search.dr.sub_agents.python_tool.dr_python_tool_graph_builder import (
+    dr_python_tool_graph_builder,
+)
 from onyx.agents.agent_search.dr.sub_agents.web_search.dr_ws_graph_builder import (
    dr_ws_graph_builder,
 )
@@ -58,12 +61,15 @@ def dr_graph_builder() -> StateGraph:
    image_generation_graph = dr_image_generation_graph_builder().compile()
    graph.add_node(DRPath.IMAGE_GENERATION, image_generation_graph)

-    custom_tool_graph = dr_custom_tool_graph_builder().compile()
-    graph.add_node(DRPath.GENERIC_TOOL, custom_tool_graph)
-
    generic_internal_tool_graph = dr_generic_internal_tool_graph_builder().compile()
    graph.add_node(DRPath.GENERIC_INTERNAL_TOOL, generic_internal_tool_graph)

+    python_tool_graph = dr_python_tool_graph_builder().compile()
+    graph.add_node(DRPath.PYTHON_TOOL, python_tool_graph)
+
+    custom_tool_graph = dr_custom_tool_graph_builder().compile()
+    graph.add_node(DRPath.GENERIC_TOOL, custom_tool_graph)
+
    graph.add_node(DRPath.CLOSER, closer)
    graph.add_node(DRPath.LOGGER, logging)

@@ -81,6 +87,7 @@ def dr_graph_builder() -> StateGraph:
    graph.add_edge(start_key=DRPath.IMAGE_GENERATION, end_key=DRPath.ORCHESTRATOR)
    graph.add_edge(start_key=DRPath.GENERIC_TOOL, end_key=DRPath.ORCHESTRATOR)
    graph.add_edge(start_key=DRPath.GENERIC_INTERNAL_TOOL, end_key=DRPath.ORCHESTRATOR)
+    graph.add_edge(start_key=DRPath.PYTHON_TOOL, end_key=DRPath.ORCHESTRATOR)

    graph.add_conditional_edges(DRPath.CLOSER, completeness_router)
    graph.add_edge(start_key=DRPath.LOGGER, end_key=END)
--- a/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py
+++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a0_clarification.py
@@ -35,24 +35,14 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.agents.agent_search.shared_graph_utils.utils import run_with_timeout
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.agents.agent_search.utils import create_question_prompt
-from onyx.chat.chat_utils import build_citation_map_from_numbers
-from onyx.chat.chat_utils import saved_search_docs_from_llm_docs
-from onyx.chat.models import PromptConfig
-from onyx.chat.prompt_builder.citations_prompt import build_citations_system_message
-from onyx.chat.prompt_builder.citations_prompt import build_citations_user_message
-from onyx.chat.stream_processing.citation_processing import (
-    normalize_square_bracket_citations_to_double_with_links,
-)
 from onyx.configs.agent_configs import TF_DR_TIMEOUT_LONG
 from onyx.configs.agent_configs import TF_DR_TIMEOUT_SHORT
 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import DocumentSourceDescription
 from onyx.configs.constants import TMP_DRALPHA_PERSONA_NAME
-from onyx.db.chat import create_search_doc_from_saved_search_doc
 from onyx.db.chat import update_db_session_with_messages
 from onyx.db.connector import fetch_unique_document_sources
 from onyx.db.kg_config import get_kg_config_settings
-from onyx.db.models import SearchDoc
 from onyx.db.models import Tool
 from onyx.db.tools import get_tools
 from onyx.file_store.models import ChatFileType
@@ -62,7 +52,6 @@ from onyx.kg.utils.extraction_utils import get_relationship_types_str
 from onyx.llm.utils import check_number_of_tokens
 from onyx.llm.utils import get_max_input_tokens
 from onyx.natural_language_processing.utils import get_tokenizer
-from onyx.prompts.chat_prompts import PROJECT_INSTRUCTIONS_SEPARATOR
 from onyx.prompts.dr_prompts import ANSWER_PROMPT_WO_TOOL_CALLING
 from onyx.prompts.dr_prompts import DECISION_PROMPT_W_TOOL_CALLING
 from onyx.prompts.dr_prompts import DECISION_PROMPT_WO_TOOL_CALLING
@@ -80,6 +69,7 @@ from onyx.tools.tool_implementations.images.image_generation_tool import (
 from onyx.tools.tool_implementations.knowledge_graph.knowledge_graph_tool import (
    KnowledgeGraphTool,
 )
+from onyx.tools.tool_implementations.python.python_tool import PythonTool
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.tool_implementations.web_search.web_search_tool import (
    WebSearchTool,
@@ -145,6 +135,9 @@ def _get_available_tools(
                continue
            llm_path = DRPath.KNOWLEDGE_GRAPH.value
            path = DRPath.KNOWLEDGE_GRAPH
+        elif isinstance(tool, PythonTool):
+            llm_path = DRPath.PYTHON_TOOL.value
+            path = DRPath.PYTHON_TOOL
        elif isinstance(tool, ImageGenerationTool):
            llm_path = DRPath.IMAGE_GENERATION.value
            path = DRPath.IMAGE_GENERATION
@@ -321,52 +314,6 @@ def _get_existing_clarification_request(
    return clarification, original_question, chat_history_string


-def _persist_final_docs_and_citations(
-    db_session: Session,
-    context_llm_docs: list[Any] | None,
-    full_answer: str | None,
-) -> tuple[list[SearchDoc], dict[int, int] | None]:
-    """Persist final documents from in-context docs and derive citation mapping.
-
-    Returns the list of persisted `SearchDoc` records and an optional
-    citation map translating inline [[n]] references to DB doc indices.
-    """
-    final_documents_db: list[SearchDoc] = []
-    citations_map: dict[int, int] | None = None
-
-    if not context_llm_docs:
-        return final_documents_db, citations_map
-
-    saved_search_docs = saved_search_docs_from_llm_docs(context_llm_docs)
-    for saved_doc in saved_search_docs:
-        db_doc = create_search_doc_from_saved_search_doc(saved_doc)
-        db_session.add(db_doc)
-        final_documents_db.append(db_doc)
-    db_session.flush()
-
-    cited_numbers: set[int] = set()
-    try:
-        # Match [[1]] or [[1, 2]] optionally followed by a link like ([[1]](http...))
-        matches = re.findall(
-            r"\[\[(\d+(?:,\s*\d+)*)\]\](?:\([^)]*\))?", full_answer or ""
-        )
-        for match in matches:
-            for num_str in match.split(","):
-                num = int(num_str.strip())
-                cited_numbers.add(num)
-    except Exception:
-        cited_numbers = set()
-
-    if cited_numbers and final_documents_db:
-        translations = build_citation_map_from_numbers(
-            cited_numbers=cited_numbers,
-            db_docs=final_documents_db,
-        )
-        citations_map = translations or None
-
-    return final_documents_db, citations_map
-
-
 _ARTIFICIAL_ALL_ENCOMPASSING_TOOL = {
    "type": "function",
    "function": {
@@ -478,13 +425,6 @@ def clarifier(
        assistant_system_prompt = PromptTemplate(DEFAULT_DR_SYSTEM_PROMPT).build()
        assistant_task_prompt = ""

-    if graph_config.inputs.project_instructions:
-        assistant_system_prompt = (
-            assistant_system_prompt
-            + PROJECT_INSTRUCTIONS_SEPARATOR
-            + graph_config.inputs.project_instructions
-        )
-
    chat_history_string = (
        get_chat_history_string(
            graph_config.inputs.prompt_builder.message_history,
@@ -513,11 +453,6 @@ def clarifier(
        graph_config.inputs.files
    )

-    # Use project/search context docs if available to enable citation mapping
-    context_llm_docs = getattr(
-        graph_config.inputs.prompt_builder, "context_llm_docs", None
-    )
-
    if not (force_use_tool and force_use_tool.force_use):

        if not use_tool_calling_llm or len(available_tools) == 1:
@@ -632,44 +567,10 @@ def clarifier(
                active_source_type_descriptions_str=active_source_type_descriptions_str,
            )

-            if context_llm_docs:
-                persona = graph_config.inputs.persona
-                if persona is not None:
-                    prompt_config = PromptConfig.from_model(persona)
-                else:
-                    prompt_config = PromptConfig(
-                        system_prompt=assistant_system_prompt,
-                        task_prompt="",
-                        datetime_aware=True,
-                    )
-
-                system_prompt_to_use_content = build_citations_system_message(
-                    prompt_config
-                ).content
-                system_prompt_to_use: str = cast(str, system_prompt_to_use_content)
-                if graph_config.inputs.project_instructions:
-                    system_prompt_to_use = (
-                        system_prompt_to_use
-                        + PROJECT_INSTRUCTIONS_SEPARATOR
-                        + graph_config.inputs.project_instructions
-                    )
-                user_prompt_to_use = build_citations_user_message(
-                    user_query=original_question,
-                    files=[],
-                    prompt_config=prompt_config,
-                    context_docs=context_llm_docs,
-                    all_doc_useful=False,
-                    history_message=chat_history_string,
-                    context_type="user files",
-                ).content
-            else:
-                system_prompt_to_use = assistant_system_prompt
-                user_prompt_to_use = decision_prompt + assistant_task_prompt
-
            stream = graph_config.tooling.primary_llm.stream(
                prompt=create_question_prompt(
-                    cast(str, system_prompt_to_use),
-                    cast(str, user_prompt_to_use),
+                    assistant_system_prompt,
+                    decision_prompt + assistant_task_prompt,
                    uploaded_image_context=uploaded_image_context,
                ),
                tools=([_ARTIFICIAL_ALL_ENCOMPASSING_TOOL]),
@@ -682,8 +583,6 @@ def clarifier(
                should_stream_answer=True,
                writer=writer,
                ind=0,
-                final_search_results=context_llm_docs,
-                displayed_search_results=context_llm_docs,
                generate_final_answer=True,
                chat_message_id=str(graph_config.persistence.chat_session_id),
            )
@@ -691,32 +590,19 @@ def clarifier(
            if len(full_response.ai_message_chunk.tool_calls) == 0:

                if isinstance(full_response.full_answer, str):
-                    full_answer = (
-                        normalize_square_bracket_citations_to_double_with_links(
-                            full_response.full_answer
-                        )
-                    )
+                    full_answer = full_response.full_answer
                else:
                    full_answer = None

-                # Persist final documents and derive citations when using in-context docs
-                final_documents_db, citations_map = _persist_final_docs_and_citations(
-                    db_session=db_session,
-                    context_llm_docs=context_llm_docs,
-                    full_answer=full_answer,
-                )
-
                update_db_session_with_messages(
                    db_session=db_session,
                    chat_message_id=message_id,
                    chat_session_id=graph_config.persistence.chat_session_id,
                    is_agentic=graph_config.behavior.use_agentic_search,
                    message=full_answer,
-                    token_count=len(llm_tokenizer.encode(full_answer or "")),
-                    citations=citations_map,
-                    final_documents=final_documents_db or None,
                    update_parent_message=True,
                    research_answer_purpose=ResearchAnswerPurpose.ANSWER,
+                    token_count=len(llm_tokenizer.encode(full_answer or "")),
                )

                db_session.commit()
@@ -896,6 +782,6 @@ def clarifier(
        active_source_types_descriptions="\n".join(active_source_types_descriptions),
        assistant_system_prompt=assistant_system_prompt,
        assistant_task_prompt=assistant_task_prompt,
-        uploaded_test_context=uploaded_text_context,
+        uploaded_text_context=uploaded_text_context,
        uploaded_image_context=uploaded_image_context,
    )
--- a/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py
+++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a1_orchestrator.py
@@ -140,7 +140,7 @@ def orchestrator(

    available_tools = state.available_tools or {}

-    uploaded_context = state.uploaded_test_context or ""
+    uploaded_context = state.uploaded_text_context or ""
    uploaded_image_context = state.uploaded_image_context or []

    questions = [
@@ -181,15 +181,6 @@ def orchestrator(
            remaining_time_budget = DR_TIME_BUDGET_BY_TYPE[research_type]

        elif remaining_time_budget <= 0:
-
-            write_custom_event(
-                current_step_nr,
-                SectionEnd(),
-                writer,
-            )
-
-            current_step_nr += 1
-
            return OrchestrationUpdate(
                tools_used=[DRPath.CLOSER.value],
                current_step_nr=current_step_nr,
--- a/backend/onyx/agents/agent_search/dr/nodes/dr_a2_closer.py
+++ b/backend/onyx/agents/agent_search/dr/nodes/dr_a2_closer.py
@@ -42,7 +42,6 @@ from onyx.db.models import ResearchAgentIteration
 from onyx.db.models import ResearchAgentIterationSubStep
 from onyx.db.models import SearchDoc as DbSearchDoc
 from onyx.llm.utils import check_number_of_tokens
-from onyx.prompts.chat_prompts import PROJECT_INSTRUCTIONS_SEPARATOR
 from onyx.prompts.dr_prompts import FINAL_ANSWER_PROMPT_W_SUB_ANSWERS
 from onyx.prompts.dr_prompts import FINAL_ANSWER_PROMPT_WITHOUT_SUB_ANSWERS
 from onyx.prompts.dr_prompts import TEST_INFO_COMPLETE_PROMPT
@@ -226,10 +225,10 @@ def closer(

    research_type = graph_config.behavior.research_type

-    assistant_system_prompt: str = state.assistant_system_prompt or ""
+    assistant_system_prompt = state.assistant_system_prompt
    assistant_task_prompt = state.assistant_task_prompt

-    uploaded_context = state.uploaded_test_context or ""
+    uploaded_context = state.uploaded_text_context or ""

    clarification = state.clarification
    prompt_question = get_prompt_question(base_question, clarification)
@@ -350,13 +349,6 @@ def closer(
        uploaded_context=uploaded_context,
    )

-    if graph_config.inputs.project_instructions:
-        assistant_system_prompt = (
-            assistant_system_prompt
-            + PROJECT_INSTRUCTIONS_SEPARATOR
-            + (graph_config.inputs.project_instructions or "")
-        )
-
    all_context_llmdocs = [
        llm_doc_from_inference_section(inference_section)
        for inference_section in all_cited_documents
--- a/backend/onyx/agents/agent_search/dr/process_llm_stream.py
+++ b/backend/onyx/agents/agent_search/dr/process_llm_stream.py
@@ -9,7 +9,6 @@ from pydantic import BaseModel
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.chat_utils import saved_search_docs_from_llm_docs
 from onyx.chat.models import AgentAnswerPiece
-from onyx.chat.models import CitationInfo
 from onyx.chat.models import LlmDoc
 from onyx.chat.models import OnyxAnswerPiece
 from onyx.chat.stream_processing.answer_response_handler import AnswerResponseHandler
@@ -19,8 +18,6 @@ from onyx.chat.stream_processing.answer_response_handler import (
 )
 from onyx.chat.stream_processing.utils import map_document_id_order
 from onyx.context.search.models import InferenceSection
-from onyx.server.query_and_chat.streaming_models import CitationDelta
-from onyx.server.query_and_chat.streaming_models import CitationStart
 from onyx.server.query_and_chat.streaming_models import MessageDelta
 from onyx.server.query_and_chat.streaming_models import MessageStart
 from onyx.server.query_and_chat.streaming_models import SectionEnd
@@ -59,9 +56,6 @@ def process_llm_stream(

    full_answer = ""
    start_final_answer_streaming_set = False
-    # Accumulate citation infos if handler emits them
-    collected_citation_infos: list[CitationInfo] = []
-
    # This stream will be the llm answer if no tool is chosen. When a tool is chosen,
    # the stream will contain AIMessageChunks with tool call information.
    for message in messages:
@@ -108,9 +102,6 @@ def process_llm_stream(
                        MessageDelta(content=response_part.answer_piece),
                        writer,
                    )
-                # collect citation info objects
-                elif isinstance(response_part, CitationInfo):
-                    collected_citation_infos.append(response_part)

    if generate_final_answer and start_final_answer_streaming_set:
        # start_final_answer_streaming_set is only set if the answer is verbal and not a tool call
@@ -120,14 +111,6 @@ def process_llm_stream(
            writer,
        )

-        # Emit citations section if any were collected
-        if collected_citation_infos:
-            write_custom_event(ind, CitationStart(), writer)
-            write_custom_event(
-                ind, CitationDelta(citations=collected_citation_infos), writer
-            )
-            write_custom_event(ind, SectionEnd(), writer)
-
    logger.debug(f"Full answer: {full_answer}")
    return BasicSearchProcessedStreamResults(
        ai_message_chunk=cast(AIMessageChunk, tool_call_chunk), full_answer=full_answer
--- a/backend/onyx/agents/agent_search/dr/states.py
+++ b/backend/onyx/agents/agent_search/dr/states.py
@@ -46,7 +46,7 @@ class OrchestrationSetup(OrchestrationUpdate):
    active_source_types_descriptions: str | None = None
    assistant_system_prompt: str | None = None
    assistant_task_prompt: str | None = None
-    uploaded_test_context: str | None = None
+    uploaded_text_context: str | None = None
    uploaded_image_context: list[dict[str, Any]] | None = None


--- a/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_2_act.py
@@ -1,7 +1,6 @@
 import re
 from datetime import datetime
 from typing import cast
-from uuid import UUID

 from langchain_core.runnables import RunnableConfig
 from langgraph.types import StreamWriter
@@ -74,7 +73,6 @@ def basic_search(

    search_tool_info = state.available_tools[state.tools_used[-1]]
    search_tool = cast(SearchTool, search_tool_info.tool_object)
-    force_use_tool = graph_config.tooling.force_use_tool

    # sanity check
    if search_tool != graph_config.tooling.search_tool:
@@ -143,15 +141,6 @@ def basic_search(
    retrieved_docs: list[InferenceSection] = []
    callback_container: list[list[InferenceSection]] = []

-    user_file_ids: list[UUID] | None = None
-    project_id: int | None = None
-    if force_use_tool.override_kwargs and isinstance(
-        force_use_tool.override_kwargs, SearchToolOverrideKwargs
-    ):
-        override_kwargs = force_use_tool.override_kwargs
-        user_file_ids = override_kwargs.user_file_ids
-        project_id = override_kwargs.project_id
-
    # new db session to avoid concurrency issues
    with get_session_with_current_tenant() as search_db_session:
        for tool_response in search_tool.run(
@@ -164,8 +153,6 @@ def basic_search(
                retrieved_sections_callback=callback_container.append,
                skip_query_analysis=True,
                original_query=rewritten_query,
-                user_file_ids=user_file_ids,
-                project_id=project_id,
            ),
        ):
            # get retrieved docs to send to the rest of the graph
--- a/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_3_reduce.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/basic_search/dr_basic_search_3_reduce.py
@@ -5,12 +5,12 @@ from langgraph.types import StreamWriter

 from onyx.agents.agent_search.dr.sub_agents.states import SubAgentMainState
 from onyx.agents.agent_search.dr.sub_agents.states import SubAgentUpdate
+from onyx.agents.agent_search.dr.utils import chunks_or_sections_to_search_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.context.search.models import SavedSearchDoc
-from onyx.context.search.models import SearchDoc
 from onyx.server.query_and_chat.streaming_models import SectionEnd
 from onyx.utils.logger import setup_logger

@@ -47,7 +47,7 @@ def is_reducer(
            doc_list.append(x)

    # Convert InferenceSections to SavedSearchDocs
-    search_docs = SearchDoc.from_chunks_or_sections(doc_list)
+    search_docs = chunks_or_sections_to_search_docs(doc_list)
    retrieved_saved_search_docs = [
        SavedSearchDoc.from_search_doc(search_doc, db_doc_id=0)
        for search_doc in search_docs
--- a/backend/onyx/agents/agent_search/dr/sub_agents/python_tool/init.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/python_tool/init.py
@@ -0,0 +1 @@
+"""Python Tool sub-agent for deep research."""
--- a/backend/onyx/agents/agent_search/dr/sub_agents/python_tool/dr_python_tool_1_branch.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/python_tool/dr_python_tool_1_branch.py
@@ -0,0 +1,36 @@
+from datetime import datetime
+
+from langchain_core.runnables import RunnableConfig
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.dr.states import LoggerUpdate
+from onyx.agents.agent_search.dr.sub_agents.states import SubAgentInput
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def python_tool_branch(
+    state: SubAgentInput, config: RunnableConfig, writer: StreamWriter = lambda _: None
+) -> LoggerUpdate:
+    """Log the beginning of a Python Tool branch."""
+
+    node_start_time = datetime.now()
+    iteration_nr = state.iteration_nr
+
+    logger.debug(
+        f"Python Tool branch start for iteration {iteration_nr} at {datetime.now()}"
+    )
+
+    return LoggerUpdate(
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="python_tool",
+                node_name="branching",
+                node_start_time=node_start_time,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/dr/sub_agents/python_tool/dr_python_tool_2_act.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/python_tool/dr_python_tool_2_act.py
@@ -0,0 +1,257 @@
+import base64
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+from typing import cast
+
+from langchain_core.messages import AIMessage
+from langchain_core.runnables import RunnableConfig
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.dr.sub_agents.states import BranchInput
+from onyx.agents.agent_search.dr.sub_agents.states import BranchUpdate
+from onyx.agents.agent_search.dr.sub_agents.states import IterationAnswer
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.configs.agent_configs import TF_DR_TIMEOUT_SHORT
+from onyx.configs.constants import MessageType
+from onyx.file_store.models import ChatFileType
+from onyx.file_store.models import InMemoryChatFile
+from onyx.llm.utils import build_content_with_imgs
+from onyx.prompts.dr_prompts import CUSTOM_TOOL_PREP_PROMPT
+from onyx.prompts.dr_prompts import PYTHON_TOOL_USE_RESPONSE_PROMPT
+from onyx.tools.tool_implementations.python.python_tool import PythonTool
+from onyx.tools.tool_implementations.python.python_tool import PythonToolResult
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def _serialize_chat_files(chat_files: list[InMemoryChatFile]) -> list[dict[str, Any]]:
+    serialized_files: list[dict[str, Any]] = []
+    for chat_file in chat_files:
+        file_payload: dict[str, Any] = {
+            "id": str(chat_file.file_id),
+            "name": chat_file.filename,
+            "type": chat_file.file_type.value,
+        }
+        if chat_file.file_type == ChatFileType.IMAGE:
+            file_payload["content"] = chat_file.to_base64()
+            file_payload["is_base64"] = True
+        elif chat_file.file_type.is_text_file():
+            file_payload["content"] = chat_file.content.decode(
+                "utf-8", errors="replace"
+            )
+            file_payload["is_base64"] = False
+        else:
+            file_payload["content"] = base64.b64encode(chat_file.content).decode(
+                "utf-8"
+            )
+            file_payload["is_base64"] = True
+        serialized_files.append(file_payload)
+
+    return serialized_files
+
+
+def python_tool_act(
+    state: BranchInput,
+    config: RunnableConfig,
+    writer: StreamWriter = lambda _: None,
+) -> BranchUpdate:
+    """Execute the Python Tool with any files supplied by the user."""
+
+    node_start_time = datetime.now()
+    iteration_nr = state.iteration_nr
+    parallelization_nr = state.parallelization_nr
+
+    if not state.available_tools:
+        raise ValueError("available_tools is not set")
+
+    tool_key = state.tools_used[-1]
+    python_tool_info = state.available_tools[tool_key]
+    python_tool = cast(PythonTool | None, python_tool_info.tool_object)
+
+    if python_tool is None:
+        raise ValueError("python_tool is not set")
+
+    branch_query = state.branch_question
+    if not branch_query:
+        raise ValueError("branch_query is not set")
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    base_question = graph_config.inputs.prompt_builder.raw_user_query
+    files = graph_config.inputs.files
+
+    logger.debug(
+        "Tool call start for %s %s.%s at %s",
+        python_tool.llm_name,
+        iteration_nr,
+        parallelization_nr,
+        datetime.now(),
+    )
+
+    tool_args: dict[str, Any] | None = None
+    if graph_config.tooling.using_tool_calling_llm:
+        tool_use_prompt = CUSTOM_TOOL_PREP_PROMPT.build(
+            query=branch_query,
+            base_question=base_question,
+            tool_description=python_tool_info.description,
+        )
+
+        content_with_files = build_content_with_imgs(
+            message=tool_use_prompt,
+            files=files,
+            message_type=MessageType.USER,
+        )
+
+        tool_prompt_message: dict[str, Any] = {
+            "role": "user",
+            "content": content_with_files,
+        }
+        if files:
+            tool_prompt_message["files"] = _serialize_chat_files(files)
+
+        tool_calling_msg = graph_config.tooling.primary_llm.invoke(
+            [tool_prompt_message],
+            tools=[python_tool.tool_definition()],
+            tool_choice="required",
+            timeout_override=TF_DR_TIMEOUT_SHORT,
+        )
+
+        if isinstance(tool_calling_msg, AIMessage) and tool_calling_msg.tool_calls:
+            tool_args = tool_calling_msg.tool_calls[0].get("args")
+        else:
+            logger.warning("Tool-calling LLM did not emit a tool call for Python Tool")
+
+    if tool_args is None:
+        tool_args = python_tool.get_args_for_non_tool_calling_llm(
+            query=branch_query,
+            history=[],
+            llm=graph_config.tooling.primary_llm,
+            force_run=True,
+        )
+
+    if tool_args is None:
+        raise ValueError("Failed to obtain tool arguments from LLM")
+
+    if "files" in tool_args:
+        tool_args = {key: value for key, value in tool_args.items() if key != "files"}
+
+    override_kwargs = {"files": files or []}
+
+    tool_responses = list(python_tool.run(override_kwargs=override_kwargs, **tool_args))
+
+    python_tool_result: PythonToolResult | None = None
+    for response in tool_responses:
+        if isinstance(response.response, PythonToolResult):
+            python_tool_result = response.response
+            break
+
+    if python_tool_result is None:
+        raise ValueError("Python tool did not return a valid result")
+
+    final_result = python_tool.final_result(*tool_responses)
+    tool_result_str = json.dumps(final_result, ensure_ascii=False)
+
+    tool_summary_prompt = PYTHON_TOOL_USE_RESPONSE_PROMPT.build(
+        base_question=base_question,
+        tool_response=tool_result_str,
+    )
+
+    initial_files = list(files or [])
+    generated_files: list[InMemoryChatFile] = []
+    for artifact in python_tool_result.artifacts:
+        if not artifact.file_id:
+            continue
+
+        chat_file = python_tool._available_files.get(artifact.file_id)
+        if not chat_file:
+            logger.warning(
+                "Generated artifact with id %s not found in available files",
+                artifact.file_id,
+            )
+            continue
+
+        filename = (
+            chat_file.filename
+            or artifact.display_name
+            or artifact.path
+            or str(artifact.file_id)
+        )
+        filename = Path(filename).name or str(artifact.file_id)
+        if not filename.startswith("generated_"):
+            filename = f"generated_{filename}"
+
+        generated_files.append(
+            InMemoryChatFile(
+                file_id=chat_file.file_id,
+                content=chat_file.content,
+                file_type=chat_file.file_type,
+                filename=filename,
+            )
+        )
+
+    summary_files = initial_files + generated_files
+    summary_content = build_content_with_imgs(
+        message=tool_summary_prompt,
+        files=summary_files,
+        message_type=MessageType.USER,
+    )
+
+    summary_message: dict[str, Any] = {
+        "role": "user",
+        "content": summary_content,
+    }
+    if summary_files:
+        summary_message["files"] = _serialize_chat_files(summary_files)
+
+    answer_string = str(
+        graph_config.tooling.primary_llm.invoke(
+            [summary_message],
+            timeout_override=TF_DR_TIMEOUT_SHORT,
+        ).content
+    ).strip()
+
+    artifact_file_ids = [
+        artifact.file_id
+        for artifact in python_tool_result.artifacts
+        if artifact.file_id
+    ]
+
+    logger.debug(
+        "Tool call end for %s %s.%s at %s",
+        python_tool.llm_name,
+        iteration_nr,
+        parallelization_nr,
+        datetime.now(),
+    )
+
+    return BranchUpdate(
+        branch_iteration_responses=[
+            IterationAnswer(
+                tool=python_tool.llm_name,
+                tool_id=python_tool_info.tool_id,
+                iteration_nr=iteration_nr,
+                parallelization_nr=parallelization_nr,
+                question=branch_query,
+                answer=answer_string,
+                claims=[],
+                cited_documents={},
+                reasoning="",
+                additional_data=None,
+                response_type="json",
+                data=final_result,
+                file_ids=artifact_file_ids or None,
+            )
+        ],
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="python_tool",
+                node_name="tool_calling",
+                node_start_time=node_start_time,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/dr/sub_agents/python_tool/dr_python_tool_3_reduce.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/python_tool/dr_python_tool_3_reduce.py
@@ -0,0 +1,76 @@
+from datetime import datetime
+
+from langchain_core.runnables import RunnableConfig
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.dr.sub_agents.states import SubAgentMainState
+from onyx.agents.agent_search.dr.sub_agents.states import SubAgentUpdate
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.server.query_and_chat.streaming_models import CustomToolDelta
+from onyx.server.query_and_chat.streaming_models import CustomToolStart
+from onyx.server.query_and_chat.streaming_models import SectionEnd
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def python_tool_reducer(
+    state: SubAgentMainState,
+    config: RunnableConfig,
+    writer: StreamWriter = lambda _: None,
+) -> SubAgentUpdate:
+    """Stream the Python Tool result back to the client."""
+
+    node_start_time = datetime.now()
+    current_step_nr = state.current_step_nr
+    branch_updates = state.branch_iteration_responses
+    current_iteration = state.iteration_nr
+
+    new_updates = [
+        update for update in branch_updates if update.iteration_nr == current_iteration
+    ]
+
+    for new_update in new_updates:
+        if not new_update.response_type:
+            raise ValueError("Response type is not returned.")
+
+        write_custom_event(
+            current_step_nr,
+            CustomToolStart(
+                tool_name=new_update.tool,
+            ),
+            writer,
+        )
+
+        write_custom_event(
+            current_step_nr,
+            CustomToolDelta(
+                tool_name=new_update.tool,
+                response_type=new_update.response_type,
+                data=new_update.data,
+                file_ids=new_update.file_ids,
+            ),
+            writer,
+        )
+
+        write_custom_event(
+            current_step_nr,
+            SectionEnd(),
+            writer,
+        )
+
+        current_step_nr += 1
+
+    return SubAgentUpdate(
+        iteration_responses=new_updates,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="python_tool",
+                node_name="consolidation",
+                node_start_time=node_start_time,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/dr/sub_agents/python_tool/dr_python_tool_conditional_edges.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/python_tool/dr_python_tool_conditional_edges.py
@@ -0,0 +1,26 @@
+from collections.abc import Hashable
+
+from langgraph.types import Send
+
+from onyx.agents.agent_search.dr.sub_agents.states import BranchInput
+from onyx.agents.agent_search.dr.sub_agents.states import SubAgentInput
+
+
+def branching_router(state: SubAgentInput) -> list[Send | Hashable]:
+    """Forward the current query to the Python Tool executor."""
+
+    return [
+        Send(
+            "act",
+            BranchInput(
+                iteration_nr=state.iteration_nr,
+                parallelization_nr=parallelization_nr,
+                branch_question=query,
+                context="",
+                active_source_types=state.active_source_types,
+                tools_used=state.tools_used,
+                available_tools=state.available_tools,
+            ),
+        )
+        for parallelization_nr, query in enumerate(state.query_list[:1])
+    ]
--- a/backend/onyx/agents/agent_search/dr/sub_agents/python_tool/dr_python_tool_graph_builder.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/python_tool/dr_python_tool_graph_builder.py
@@ -0,0 +1,38 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agents.agent_search.dr.sub_agents.python_tool.dr_python_tool_1_branch import (
+    python_tool_branch,
+)
+from onyx.agents.agent_search.dr.sub_agents.python_tool.dr_python_tool_2_act import (
+    python_tool_act,
+)
+from onyx.agents.agent_search.dr.sub_agents.python_tool.dr_python_tool_3_reduce import (
+    python_tool_reducer,
+)
+from onyx.agents.agent_search.dr.sub_agents.python_tool.dr_python_tool_conditional_edges import (
+    branching_router,
+)
+from onyx.agents.agent_search.dr.sub_agents.states import SubAgentInput
+from onyx.agents.agent_search.dr.sub_agents.states import SubAgentMainState
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def dr_python_tool_graph_builder() -> StateGraph:
+    """LangGraph graph builder for the Python Tool sub-agent."""
+
+    graph = StateGraph(state_schema=SubAgentMainState, input=SubAgentInput)
+
+    graph.add_node("branch", python_tool_branch)
+    graph.add_node("act", python_tool_act)
+    graph.add_node("reducer", python_tool_reducer)
+
+    graph.add_edge(start_key=START, end_key="branch")
+    graph.add_conditional_edges("branch", branching_router)
+    graph.add_edge(start_key="act", end_key="reducer")
+    graph.add_edge(start_key="reducer", end_key=END)
+
+    return graph
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/clients/serper_client.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/clients/serper_client.py
@@ -1,147 +0,0 @@
-import json
-from concurrent.futures import ThreadPoolExecutor
-
-import requests
-
-from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
-    InternetContent,
-)
-from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
-    InternetSearchProvider,
-)
-from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
-    InternetSearchResult,
-)
-from onyx.configs.chat_configs import SERPER_API_KEY
-from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
-from onyx.utils.retry_wrapper import retry_builder
-
-SERPER_SEARCH_URL = "https://google.serper.dev/search"
-SERPER_CONTENTS_URL = "https://scrape.serper.dev"
-
-
-class SerperClient(InternetSearchProvider):
-    def __init__(self, api_key: str | None = SERPER_API_KEY) -> None:
-        self.headers = {
-            "X-API-KEY": api_key,
-            "Content-Type": "application/json",
-        }
-
-    @retry_builder(tries=3, delay=1, backoff=2)
-    def search(self, query: str) -> list[InternetSearchResult]:
-        payload = {
-            "q": query,
-        }
-
-        response = requests.post(
-            SERPER_SEARCH_URL,
-            headers=self.headers,
-            data=json.dumps(payload),
-        )
-
-        response.raise_for_status()
-
-        results = response.json()
-        organic_results = results["organic"]
-
-        return [
-            InternetSearchResult(
-                title=result["title"],
-                link=result["link"],
-                snippet=result["snippet"],
-                author=None,
-                published_date=None,
-            )
-            for result in organic_results
-        ]
-
-    def contents(self, urls: list[str]) -> list[InternetContent]:
-        if not urls:
-            return []
-
-        # Serper can responds with 500s regularly. We want to retry,
-        # but in the event of failure, return an unsuccesful scrape.
-        def safe_get_webpage_content(url: str) -> InternetContent:
-            try:
-                return self._get_webpage_content(url)
-            except Exception:
-                return InternetContent(
-                    title="",
-                    link=url,
-                    full_content="",
-                    published_date=None,
-                    scrape_successful=False,
-                )
-
-        with ThreadPoolExecutor(max_workers=min(8, len(urls))) as e:
-            return list(e.map(safe_get_webpage_content, urls))
-
-    @retry_builder(tries=3, delay=1, backoff=2)
-    def _get_webpage_content(self, url: str) -> InternetContent:
-        payload = {
-            "url": url,
-        }
-
-        response = requests.post(
-            SERPER_CONTENTS_URL,
-            headers=self.headers,
-            data=json.dumps(payload),
-        )
-
-        # 400 returned when serper cannot scrape
-        if response.status_code == 400:
-            return InternetContent(
-                title="",
-                link=url,
-                full_content="",
-                published_date=None,
-                scrape_successful=False,
-            )
-
-        response.raise_for_status()
-
-        response_json = response.json()
-
-        # Response only guarantees text
-        text = response_json["text"]
-
-        # metadata & jsonld is not guaranteed to be present
-        metadata = response_json.get("metadata", {})
-        jsonld = response_json.get("jsonld", {})
-
-        title = extract_title_from_metadata(metadata)
-
-        # Serper does not provide a reliable mechanism to extract the url
-        response_url = url
-        published_date_str = extract_published_date_from_jsonld(jsonld)
-        published_date = None
-
-        if published_date_str:
-            try:
-                published_date = time_str_to_utc(published_date_str)
-            except Exception:
-                published_date = None
-
-        return InternetContent(
-            title=title or "",
-            link=response_url,
-            full_content=text or "",
-            published_date=published_date,
-        )
-
-
-def extract_title_from_metadata(metadata: dict[str, str]) -> str | None:
-    keys = ["title", "og:title"]
-    return extract_value_from_dict(metadata, keys)
-
-
-def extract_published_date_from_jsonld(jsonld: dict[str, str]) -> str | None:
-    keys = ["dateModified"]
-    return extract_value_from_dict(jsonld, keys)
-
-
-def extract_value_from_dict(data: dict[str, str], keys: list[str]) -> str | None:
-    for key in keys:
-        if key in data:
-            return data[key]
-    return None
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/models.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/models.py
@@ -26,7 +26,6 @@ class InternetContent(BaseModel):
    link: str
    full_content: str
    published_date: datetime | None = None
-    scrape_successful: bool = True


 class InternetSearchProvider(ABC):
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/providers.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/providers.py
@@ -1,19 +1,13 @@
 from onyx.agents.agent_search.dr.sub_agents.web_search.clients.exa_client import (
    ExaClient,
 )
-from onyx.agents.agent_search.dr.sub_agents.web_search.clients.serper_client import (
-    SerperClient,
-)
 from onyx.agents.agent_search.dr.sub_agents.web_search.models import (
    InternetSearchProvider,
 )
 from onyx.configs.chat_configs import EXA_API_KEY
-from onyx.configs.chat_configs import SERPER_API_KEY


 def get_default_provider() -> InternetSearchProvider | None:
    if EXA_API_KEY:
        return ExaClient()
-    if SERPER_API_KEY:
-        return SerperClient()
    return None
--- a/backend/onyx/agents/agent_search/dr/sub_agents/web_search/utils.py
+++ b/backend/onyx/agents/agent_search/dr/sub_agents/web_search/utils.py
@@ -34,7 +34,7 @@ def dummy_inference_section_from_internet_content(
            boost=1,
            recency_bias=1.0,
            score=1.0,
-            hidden=(not result.scrape_successful),
+            hidden=False,
            metadata={},
            match_highlights=[],
            doc_summary=truncated_content,
--- a/backend/onyx/agents/agent_search/dr/utils.py
+++ b/backend/onyx/agents/agent_search/dr/utils.py
@@ -13,7 +13,7 @@ from onyx.agents.agent_search.shared_graph_utils.operators import (
 )
 from onyx.context.search.models import InferenceSection
 from onyx.context.search.models import SavedSearchDoc
-from onyx.context.search.models import SearchDoc
+from onyx.context.search.utils import chunks_or_sections_to_search_docs
 from onyx.tools.tool_implementations.web_search.web_search_tool import (
    WebSearchTool,
 )
@@ -266,7 +266,7 @@ def convert_inference_sections_to_search_docs(
    is_internet: bool = False,
 ) -> list[SavedSearchDoc]:
    # Convert InferenceSections to SavedSearchDocs
-    search_docs = SearchDoc.from_chunks_or_sections(inference_sections)
+    search_docs = chunks_or_sections_to_search_docs(inference_sections)
    for search_doc in search_docs:
        search_doc.is_internet = is_internet

--- a/backend/onyx/agents/agent_search/models.py
+++ b/backend/onyx/agents/agent_search/models.py
@@ -24,7 +24,6 @@ class GraphInputs(BaseModel):
    prompt_builder: AnswerPromptBuilder
    files: list[InMemoryChatFile] | None = None
    structured_response_format: dict | None = None
-    project_instructions: str | None = None

    class Config:
        arbitrary_types_allowed = True
--- a/backend/onyx/agents/agent_search/orchestration/states.py
+++ b/backend/onyx/agents/agent_search/orchestration/states.py
@@ -1,6 +1,6 @@
 from pydantic import BaseModel

-from onyx.chat.prompt_builder.schemas import PromptSnapshot
+from onyx.chat.prompt_builder.answer_prompt_builder import PromptSnapshot
 from onyx.tools.message import ToolCallSummary
 from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.models import ToolCallFinalResult
--- a/backend/onyx/agents/agent_search/shared_graph_utils/llm.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/llm.py
@@ -8,6 +8,8 @@ from typing import TypeVar
 from langchain.schema.language_model import LanguageModelInput
 from langchain_core.messages import HumanMessage
 from langgraph.types import StreamWriter
+from litellm import get_supported_openai_params
+from litellm import supports_response_schema
 from pydantic import BaseModel

 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
@@ -145,7 +147,6 @@ def invoke_llm_json(
    Invoke an LLM, forcing it to respond in a specified JSON format if possible,
    and return an object of that schema.
    """
-    from litellm.utils import get_supported_openai_params, supports_response_schema

    # check if the model supports response_format: json_schema
    supports_json = "response_format" in (
--- a/backend/onyx/background/celery/apps/light.py
+++ b/backend/onyx/background/celery/apps/light.py
@@ -115,6 +115,7 @@ celery_app.autodiscover_tasks(
        "onyx.background.celery.tasks.vespa",
        "onyx.background.celery.tasks.connector_deletion",
        "onyx.background.celery.tasks.doc_permission_syncing",
+        "onyx.background.celery.tasks.user_file_folder_sync",
        "onyx.background.celery.tasks.docprocessing",
    ]
 )
--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -323,6 +323,7 @@ celery_app.autodiscover_tasks(
        "onyx.background.celery.tasks.shared",
        "onyx.background.celery.tasks.vespa",
        "onyx.background.celery.tasks.llm_model_update",
+        "onyx.background.celery.tasks.user_file_folder_sync",
        "onyx.background.celery.tasks.kg_processing",
    ]
 )
--- a/backend/onyx/background/celery/apps/user_file_processing.py
+++ b/backend/onyx/background/celery/apps/user_file_processing.py
@@ -1,113 +0,0 @@
-from typing import Any
-from typing import cast
-
-from celery import Celery
-from celery import signals
-from celery import Task
-from celery.apps.worker import Worker
-from celery.signals import celeryd_init
-from celery.signals import worker_init
-from celery.signals import worker_process_init
-from celery.signals import worker_ready
-from celery.signals import worker_shutdown
-
-import onyx.background.celery.apps.app_base as app_base
-from onyx.configs.constants import POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME
-from onyx.db.engine.sql_engine import SqlEngine
-from onyx.utils.logger import setup_logger
-from shared_configs.configs import MULTI_TENANT
-
-
-logger = setup_logger()
-
-celery_app = Celery(__name__)
-celery_app.config_from_object("onyx.background.celery.configs.user_file_processing")
-celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]
-
-
-@signals.task_prerun.connect
-def on_task_prerun(
-    sender: Any | None = None,
-    task_id: str | None = None,
-    task: Task | None = None,
-    args: tuple | None = None,
-    kwargs: dict | None = None,
-    **kwds: Any,
-) -> None:
-    app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds)
-
-
-@signals.task_postrun.connect
-def on_task_postrun(
-    sender: Any | None = None,
-    task_id: str | None = None,
-    task: Task | None = None,
-    args: tuple | None = None,
-    kwargs: dict | None = None,
-    retval: Any | None = None,
-    state: str | None = None,
-    **kwds: Any,
-) -> None:
-    app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds)
-
-
-@celeryd_init.connect
-def on_celeryd_init(sender: str, conf: Any = None, **kwargs: Any) -> None:
-    app_base.on_celeryd_init(sender, conf, **kwargs)
-
-
-@worker_init.connect
-def on_worker_init(sender: Worker, **kwargs: Any) -> None:
-    logger.info("worker_init signal received.")
-
-    SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME)
-
-    # rkuo: Transient errors keep happening in the indexing watchdog threads.
-    # "SSL connection has been closed unexpectedly"
-    # actually setting the spawn method in the cloud fixes 95% of these.
-    # setting pre ping might help even more, but not worrying about that yet
-    pool_size = cast(int, sender.concurrency)  # type: ignore
-    SqlEngine.init_engine(pool_size=pool_size, max_overflow=8)
-
-    app_base.wait_for_redis(sender, **kwargs)
-    app_base.wait_for_db(sender, **kwargs)
-    app_base.wait_for_vespa_or_shutdown(sender, **kwargs)
-
-    # Less startup checks in multi-tenant case
-    if MULTI_TENANT:
-        return
-
-    app_base.on_secondary_worker_init(sender, **kwargs)
-
-
-@worker_ready.connect
-def on_worker_ready(sender: Any, **kwargs: Any) -> None:
-    app_base.on_worker_ready(sender, **kwargs)
-
-
-@worker_shutdown.connect
-def on_worker_shutdown(sender: Any, **kwargs: Any) -> None:
-    app_base.on_worker_shutdown(sender, **kwargs)
-
-
-@worker_process_init.connect
-def init_worker(**kwargs: Any) -> None:
-    SqlEngine.reset_engine()
-
-
-@signals.setup_logging.connect
-def on_setup_logging(
-    loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any
-) -> None:
-    app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs)
-
-
-base_bootsteps = app_base.get_bootsteps()
-for bootstep in base_bootsteps:
-    celery_app.steps["worker"].add(bootstep)
-
-celery_app.autodiscover_tasks(
-    [
-        "onyx.background.celery.tasks.user_file_processing",
-    ]
-)
--- a/backend/onyx/background/celery/celery_utils.py
+++ b/backend/onyx/background/celery/celery_utils.py
@@ -19,9 +19,7 @@ from onyx.connectors.interfaces import CheckpointedConnector
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SlimConnector
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
 from onyx.connectors.models import Document
-from onyx.connectors.models import SlimDocument
 from onyx.httpx.httpx_pool import HttpxPool
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger
@@ -32,7 +30,7 @@ PRUNING_CHECKPOINTED_BATCH_SIZE = 32


 def document_batch_to_ids(
-    doc_batch: Iterator[list[Document]] | Iterator[list[SlimDocument]],
+    doc_batch: Iterator[list[Document]],
 ) -> Generator[set[str], None, None]:
    for doc_list in doc_batch:
        yield {doc.id for doc in doc_list}
@@ -43,24 +41,20 @@ def extract_ids_from_runnable_connector(
    callback: IndexingHeartbeatInterface | None = None,
 ) -> set[str]:
    """
-    If the given connector is neither a SlimConnector nor a SlimConnectorWithPermSync, just pull
+    If the SlimConnector hasnt been implemented for the given connector, just pull
    all docs using the load_from_state and grab out the IDs.

    Optionally, a callback can be passed to handle the length of each document batch.
    """
    all_connector_doc_ids: set[str] = set()

-    doc_batch_id_generator = None
    if isinstance(runnable_connector, SlimConnector):
-        doc_batch_id_generator = document_batch_to_ids(
-            runnable_connector.retrieve_all_slim_docs()
-        )
-    elif isinstance(runnable_connector, SlimConnectorWithPermSync):
-        doc_batch_id_generator = document_batch_to_ids(
-            runnable_connector.retrieve_all_slim_docs_perm_sync()
-        )
-    # If the connector isn't slim, fall back to running it normally to get ids
-    elif isinstance(runnable_connector, LoadConnector):
+        for metadata_batch in runnable_connector.retrieve_all_slim_documents():
+            all_connector_doc_ids.update({doc.id for doc in metadata_batch})
+
+    doc_batch_id_generator = None
+
+    if isinstance(runnable_connector, LoadConnector):
        doc_batch_id_generator = document_batch_to_ids(
            runnable_connector.load_from_state()
        )
@@ -84,14 +78,13 @@ def extract_ids_from_runnable_connector(
        raise RuntimeError("Pruning job could not find a valid runnable_connector.")

    # this function is called per batch for rate limiting
-    doc_batch_processing_func = (
-        rate_limit_builder(
+    def doc_batch_processing_func(doc_batch_ids: set[str]) -> set[str]:
+        return doc_batch_ids
+
+    if MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE:
+        doc_batch_processing_func = rate_limit_builder(
            max_calls=MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE, period=60
        )(lambda x: x)
-        if MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE
-        else lambda x: x
-    )
-
    for doc_batch_ids in doc_batch_id_generator:
        if callback:
            if callback.should_stop():
--- a/backend/onyx/background/celery/configs/user_file_processing.py
+++ b/backend/onyx/background/celery/configs/user_file_processing.py
@@ -1,22 +0,0 @@
-import onyx.background.celery.configs.base as shared_config
-from onyx.configs.app_configs import CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY
-
-broker_url = shared_config.broker_url
-broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup
-broker_pool_limit = shared_config.broker_pool_limit
-broker_transport_options = shared_config.broker_transport_options
-
-redis_socket_keepalive = shared_config.redis_socket_keepalive
-redis_retry_on_timeout = shared_config.redis_retry_on_timeout
-redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval
-
-result_backend = shared_config.result_backend
-result_expires = shared_config.result_expires  # 86400 seconds is the default
-
-task_default_priority = shared_config.task_default_priority
-task_acks_late = shared_config.task_acks_late
-
-# User file processing worker configuration
-worker_concurrency = CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY
-worker_pool = "threads"
-worker_prefetch_multiplier = 1
--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -26,26 +26,6 @@ CLOUD_DOC_PERMISSION_SYNC_MULTIPLIER_DEFAULT = 1.0

 # tasks that run in either self-hosted on cloud
 beat_task_templates: list[dict] = [
-    {
-        "name": "check-for-user-file-processing",
-        "task": OnyxCeleryTask.CHECK_FOR_USER_FILE_PROCESSING,
-        "schedule": timedelta(seconds=20),
-        "options": {
-            "priority": OnyxCeleryPriority.MEDIUM,
-            "expires": BEAT_EXPIRES_DEFAULT,
-            "queue": OnyxCeleryQueues.USER_FILE_PROCESSING,
-        },
-    },
-    {
-        "name": "user-file-docid-migration",
-        "task": OnyxCeleryTask.USER_FILE_DOCID_MIGRATION,
-        "schedule": timedelta(minutes=1),
-        "options": {
-            "priority": OnyxCeleryPriority.HIGH,
-            "expires": BEAT_EXPIRES_DEFAULT,
-            "queue": OnyxCeleryQueues.USER_FILE_PROCESSING,
-        },
-    },
    {
        "name": "check-for-kg-processing",
        "task": OnyxCeleryTask.CHECK_KG_PROCESSING,
@@ -85,9 +65,9 @@ beat_task_templates: list[dict] = [
    {
        "name": "check-for-index-attempt-cleanup",
        "task": OnyxCeleryTask.CHECK_FOR_INDEX_ATTEMPT_CLEANUP,
-        "schedule": timedelta(minutes=30),
+        "schedule": timedelta(hours=1),
        "options": {
-            "priority": OnyxCeleryPriority.MEDIUM,
+            "priority": OnyxCeleryPriority.LOW,
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
@@ -109,6 +89,17 @@ beat_task_templates: list[dict] = [
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
+    {
+        "name": "check-for-user-file-folder-sync",
+        "task": OnyxCeleryTask.CHECK_FOR_USER_FILE_FOLDER_SYNC,
+        "schedule": timedelta(
+            days=1
+        ),  # This should essentially always be triggered manually for user folder updates.
+        "options": {
+            "priority": OnyxCeleryPriority.MEDIUM,
+            "expires": BEAT_EXPIRES_DEFAULT,
+        },
+    },
    {
        "name": "check-for-pruning",
        "task": OnyxCeleryTask.CHECK_FOR_PRUNING,
--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -28,6 +28,9 @@ from onyx.db.connector_credential_pair import add_deletion_failure_message
 from onyx.db.connector_credential_pair import (
    delete_connector_credential_pair__no_commit,
 )
+from onyx.db.connector_credential_pair import (
+    delete_userfiles_for_cc_pair__no_commit,
+)
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
 from onyx.db.document import (
@@ -481,6 +484,12 @@ def monitor_connector_deletion_taskset(
            # related to the deleted DocumentByConnectorCredentialPair during commit
            db_session.expire(cc_pair)

+            # delete all userfiles for the cc_pair
+            delete_userfiles_for_cc_pair__no_commit(
+                db_session=db_session,
+                cc_pair_id=cc_pair_id,
+            )
+
            # finally, delete the cc-pair
            delete_connector_credential_pair__no_commit(
                db_session=db_session,
--- a/backend/onyx/background/celery/tasks/docprocessing/tasks.py
+++ b/backend/onyx/background/celery/tasks/docprocessing/tasks.py
@@ -85,9 +85,6 @@ from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.document_batch_storage import DocumentBatchStorage
 from onyx.file_store.document_batch_storage import get_document_batch_storage
 from onyx.httpx.httpx_pool import HttpxPool
-from onyx.indexing.adapters.document_indexing_adapter import (
-    DocumentIndexingBatchAdapter,
-)
 from onyx.indexing.embedder import DefaultIndexingEmbedder
 from onyx.indexing.indexing_pipeline import run_indexing_pipeline
 from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
@@ -1372,14 +1369,6 @@ def _docprocessing_task(
                f"Processing {len(documents)} documents through indexing pipeline"
            )

-            adapter = DocumentIndexingBatchAdapter(
-                db_session=db_session,
-                connector_id=index_attempt.connector_credential_pair.connector.id,
-                credential_id=index_attempt.connector_credential_pair.credential.id,
-                tenant_id=tenant_id,
-                index_attempt_metadata=index_attempt_metadata,
-            )
-
            # real work happens here!
            index_pipeline_result = run_indexing_pipeline(
                embedder=embedding_model,
@@ -1389,8 +1378,7 @@ def _docprocessing_task(
                db_session=db_session,
                tenant_id=tenant_id,
                document_batch=documents,
-                request_id=index_attempt_metadata.request_id,
-                adapter=adapter,
+                index_attempt_metadata=index_attempt_metadata,
            )

        # Update batch completion and document counts atomically using database coordination
--- a/backend/onyx/background/celery/tasks/monitoring/tasks.py
+++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py
@@ -889,12 +889,6 @@ def monitor_celery_queues_helper(
    n_user_files_indexing = celery_get_queue_length(
        OnyxCeleryQueues.USER_FILES_INDEXING, r_celery
    )
-    n_user_file_processing = celery_get_queue_length(
-        OnyxCeleryQueues.USER_FILE_PROCESSING, r_celery
-    )
-    n_user_file_project_sync = celery_get_queue_length(
-        OnyxCeleryQueues.USER_FILE_PROJECT_SYNC, r_celery
-    )
    n_sync = celery_get_queue_length(OnyxCeleryQueues.VESPA_METADATA_SYNC, r_celery)
    n_deletion = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_DELETION, r_celery)
    n_pruning = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_PRUNING, r_celery)
@@ -922,8 +916,6 @@ def monitor_celery_queues_helper(
        f"docprocessing={n_docprocessing} "
        f"docprocessing_prefetched={len(n_docprocessing_prefetched)} "
        f"user_files_indexing={n_user_files_indexing} "
-        f"user_file_processing={n_user_file_processing} "
-        f"user_file_project_sync={n_user_file_project_sync} "
        f"sync={n_sync} "
        f"deletion={n_deletion} "
        f"pruning={n_pruning} "
--- a/backend/onyx/background/celery/tasks/user_file_folder_sync/tasks.py
+++ b/backend/onyx/background/celery/tasks/user_file_folder_sync/tasks.py
@@ -0,0 +1,266 @@
+import time
+from typing import List
+
+from celery import shared_task
+from celery import Task
+from celery.exceptions import SoftTimeLimitExceeded
+from redis.lock import Lock as RedisLock
+from sqlalchemy.orm import Session
+from tenacity import RetryError
+
+from onyx.background.celery.apps.app_base import task_logger
+from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
+from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
+from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
+from onyx.background.celery.tasks.shared.tasks import OnyxCeleryTaskCompletionStatus
+from onyx.configs.app_configs import JOB_TIMEOUT
+from onyx.configs.constants import CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT
+from onyx.configs.constants import OnyxCeleryTask
+from onyx.configs.constants import OnyxRedisLocks
+from onyx.db.connector_credential_pair import (
+    get_connector_credential_pairs_with_user_files,
+)
+from onyx.db.document import get_document
+from onyx.db.engine.sql_engine import get_session_with_current_tenant
+from onyx.db.models import ConnectorCredentialPair
+from onyx.db.models import Document
+from onyx.db.models import DocumentByConnectorCredentialPair
+from onyx.db.search_settings import get_active_search_settings
+from onyx.db.user_documents import fetch_user_files_for_documents
+from onyx.db.user_documents import fetch_user_folders_for_documents
+from onyx.document_index.factory import get_default_document_index
+from onyx.document_index.interfaces import VespaDocumentUserFields
+from onyx.httpx.httpx_pool import HttpxPool
+from onyx.redis.redis_pool import get_redis_client
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+@shared_task(
+    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_FOLDER_SYNC,
+    ignore_result=True,
+    soft_time_limit=JOB_TIMEOUT,
+    trail=False,
+    bind=True,
+)
+def check_for_user_file_folder_sync(self: Task, *, tenant_id: str) -> bool | None:
+    """Runs periodically to check for documents that need user file folder metadata updates.
+    This task fetches all connector credential pairs with user files, gets the documents
+    associated with them, and updates the user file and folder metadata in Vespa.
+    """
+
+    time_start = time.monotonic()
+
+    r = get_redis_client()
+
+    lock_beat: RedisLock = r.lock(
+        OnyxRedisLocks.CHECK_USER_FILE_FOLDER_SYNC_BEAT_LOCK,
+        timeout=CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT,
+    )
+
+    # these tasks should never overlap
+    if not lock_beat.acquire(blocking=False):
+        return None
+
+    try:
+        with get_session_with_current_tenant() as db_session:
+            # Get all connector credential pairs that have user files
+            cc_pairs = get_connector_credential_pairs_with_user_files(db_session)
+
+            if not cc_pairs:
+                task_logger.info("No connector credential pairs with user files found")
+                return True
+
+            # Get all documents associated with these cc_pairs
+            document_ids = get_documents_for_cc_pairs(cc_pairs, db_session)
+
+            if not document_ids:
+                task_logger.info(
+                    "No documents found for connector credential pairs with user files"
+                )
+                return True
+
+            # Fetch current user file and folder IDs for these documents
+            doc_id_to_user_file_id = fetch_user_files_for_documents(
+                document_ids=document_ids, db_session=db_session
+            )
+            doc_id_to_user_folder_id = fetch_user_folders_for_documents(
+                document_ids=document_ids, db_session=db_session
+            )
+
+            # Update Vespa metadata for each document
+            for doc_id in document_ids:
+                user_file_id = doc_id_to_user_file_id.get(doc_id)
+                user_folder_id = doc_id_to_user_folder_id.get(doc_id)
+
+                if user_file_id is not None or user_folder_id is not None:
+                    # Schedule a task to update the document metadata
+                    update_user_file_folder_metadata.apply_async(
+                        args=(doc_id,),  # Use tuple instead of list for args
+                        kwargs={
+                            "tenant_id": tenant_id,
+                            "user_file_id": user_file_id,
+                            "user_folder_id": user_folder_id,
+                        },
+                        queue="vespa_metadata_sync",
+                    )
+
+            task_logger.info(
+                f"Scheduled metadata updates for {len(document_ids)} documents. "
+                f"Elapsed time: {time.monotonic() - time_start:.2f}s"
+            )
+
+            return True
+    except Exception as e:
+        task_logger.exception(f"Error in check_for_user_file_folder_sync: {e}")
+        return False
+    finally:
+        lock_beat.release()
+
+
+def get_documents_for_cc_pairs(
+    cc_pairs: List[ConnectorCredentialPair], db_session: Session
+) -> List[str]:
+    """Get all document IDs associated with the given connector credential pairs."""
+    if not cc_pairs:
+        return []
+
+    cc_pair_ids = [cc_pair.id for cc_pair in cc_pairs]
+
+    # Query to get document IDs from DocumentByConnectorCredentialPair
+    # Note: DocumentByConnectorCredentialPair uses connector_id and credential_id, not cc_pair_id
+    doc_cc_pairs = (
+        db_session.query(Document.id)
+        .join(
+            DocumentByConnectorCredentialPair,
+            Document.id == DocumentByConnectorCredentialPair.id,
+        )
+        .filter(
+            db_session.query(ConnectorCredentialPair)
+            .filter(
+                ConnectorCredentialPair.id.in_(cc_pair_ids),
+                ConnectorCredentialPair.connector_id
+                == DocumentByConnectorCredentialPair.connector_id,
+                ConnectorCredentialPair.credential_id
+                == DocumentByConnectorCredentialPair.credential_id,
+            )
+            .exists()
+        )
+        .all()
+    )
+
+    return [doc_id for (doc_id,) in doc_cc_pairs]
+
+
+@shared_task(
+    name=OnyxCeleryTask.UPDATE_USER_FILE_FOLDER_METADATA,
+    bind=True,
+    soft_time_limit=LIGHT_SOFT_TIME_LIMIT,
+    time_limit=LIGHT_TIME_LIMIT,
+    max_retries=3,
+)
+def update_user_file_folder_metadata(
+    self: Task,
+    document_id: str,
+    *,
+    tenant_id: str,
+    user_file_id: int | None,
+    user_folder_id: int | None,
+) -> bool:
+    """Updates the user file and folder metadata for a document in Vespa."""
+    start = time.monotonic()
+    completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED
+
+    try:
+        with get_session_with_current_tenant() as db_session:
+            active_search_settings = get_active_search_settings(db_session)
+            doc_index = get_default_document_index(
+                search_settings=active_search_settings.primary,
+                secondary_search_settings=active_search_settings.secondary,
+                httpx_client=HttpxPool.get("vespa"),
+            )
+
+            retry_index = RetryDocumentIndex(doc_index)
+
+            doc = get_document(document_id, db_session)
+            if not doc:
+                elapsed = time.monotonic() - start
+                task_logger.info(
+                    f"doc={document_id} "
+                    f"action=no_operation "
+                    f"elapsed={elapsed:.2f}"
+                )
+                completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED
+                return False
+
+            # Create user fields object with file and folder IDs
+            user_fields = VespaDocumentUserFields(
+                user_file_id=str(user_file_id) if user_file_id is not None else None,
+                user_folder_id=(
+                    str(user_folder_id) if user_folder_id is not None else None
+                ),
+            )
+
+            # Update Vespa. OK if doc doesn't exist. Raises exception otherwise.
+            chunks_affected = retry_index.update_single(
+                document_id,
+                tenant_id=tenant_id,
+                chunk_count=doc.chunk_count,
+                fields=None,  # We're only updating user fields
+                user_fields=user_fields,
+            )
+
+            elapsed = time.monotonic() - start
+            task_logger.info(
+                f"doc={document_id} "
+                f"action=user_file_folder_sync "
+                f"user_file_id={user_file_id} "
+                f"user_folder_id={user_folder_id} "
+                f"chunks={chunks_affected} "
+                f"elapsed={elapsed:.2f}"
+            )
+            completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
+            return True
+
+    except SoftTimeLimitExceeded:
+        task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
+        completion_status = OnyxCeleryTaskCompletionStatus.SOFT_TIME_LIMIT
+    except Exception as ex:
+        e: Exception | None = None
+        while True:
+            if isinstance(ex, RetryError):
+                task_logger.warning(
+                    f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
+                )
+
+                # only set the inner exception if it is of type Exception
+                e_temp = ex.last_attempt.exception()
+                if isinstance(e_temp, Exception):
+                    e = e_temp
+            else:
+                e = ex
+
+            task_logger.exception(
+                f"update_user_file_folder_metadata exceptioned: doc={document_id}"
+            )
+
+            completion_status = OnyxCeleryTaskCompletionStatus.RETRYABLE_EXCEPTION
+            if (
+                self.max_retries is not None
+                and self.request.retries >= self.max_retries
+            ):
+                completion_status = (
+                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
+                )
+
+            # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
+            countdown = 2 ** (self.request.retries + 4)
+            self.retry(exc=e, countdown=countdown)  # this will raise a celery exception
+            break  # we won't hit this, but it looks weird not to have it
+    finally:
+        task_logger.info(
+            f"update_user_file_folder_metadata completed: status={completion_status.value} doc={document_id}"
+        )
+
+    return False
--- a/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
+++ b/backend/onyx/background/celery/tasks/user_file_processing/tasks.py
@@ -1,699 +0,0 @@
-import datetime
-import time
-from collections.abc import Sequence
-from typing import Any
-from uuid import UUID
-
-import httpx
-import sqlalchemy as sa
-from celery import shared_task
-from celery import Task
-from redis.lock import Lock as RedisLock
-from sqlalchemy import select
-
-from onyx.background.celery.apps.app_base import task_logger
-from onyx.background.celery.celery_utils import httpx_init_vespa_pool
-from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
-from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
-from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
-from onyx.configs.app_configs import MANAGED_VESPA
-from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
-from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
-from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
-from onyx.configs.constants import DocumentSource
-from onyx.configs.constants import FileOrigin
-from onyx.configs.constants import OnyxCeleryPriority
-from onyx.configs.constants import OnyxCeleryQueues
-from onyx.configs.constants import OnyxCeleryTask
-from onyx.configs.constants import OnyxRedisLocks
-from onyx.connectors.file.connector import LocalFileConnector
-from onyx.connectors.models import Document
-from onyx.db.engine.sql_engine import get_session_with_current_tenant
-from onyx.db.enums import UserFileStatus
-from onyx.db.models import FileRecord
-from onyx.db.models import SearchDoc
-from onyx.db.models import UserFile
-from onyx.db.search_settings import get_active_search_settings
-from onyx.db.search_settings import get_active_search_settings_list
-from onyx.document_index.factory import get_default_document_index
-from onyx.document_index.interfaces import VespaDocumentUserFields
-from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
-from onyx.document_index.vespa.shared_utils.utils import (
-    replace_invalid_doc_id_characters,
-)
-from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
-from onyx.document_index.vespa_constants import USER_PROJECT
-from onyx.file_store.file_store import get_default_file_store
-from onyx.file_store.file_store import S3BackedFileStore
-from onyx.httpx.httpx_pool import HttpxPool
-from onyx.indexing.adapters.user_file_indexing_adapter import UserFileIndexingAdapter
-from onyx.indexing.embedder import DefaultIndexingEmbedder
-from onyx.indexing.indexing_pipeline import run_indexing_pipeline
-from onyx.natural_language_processing.search_nlp_models import (
-    InformationContentClassificationModel,
-)
-from onyx.redis.redis_pool import get_redis_client
-
-
-def _as_uuid(value: str | UUID) -> UUID:
-    """Return a UUID, accepting either a UUID or a string-like value."""
-    return value if isinstance(value, UUID) else UUID(str(value))
-
-
-def _user_file_lock_key(user_file_id: str | UUID) -> str:
-    return f"{OnyxRedisLocks.USER_FILE_PROCESSING_LOCK_PREFIX}:{user_file_id}"
-
-
-def _user_file_project_sync_lock_key(user_file_id: str | UUID) -> str:
-    return f"{OnyxRedisLocks.USER_FILE_PROJECT_SYNC_LOCK_PREFIX}:{user_file_id}"
-
-
-@shared_task(
-    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_PROCESSING,
-    soft_time_limit=300,
-    bind=True,
-    ignore_result=True,
-)
-def check_user_file_processing(self: Task, *, tenant_id: str) -> None:
-    """Scan for user files with PROCESSING status and enqueue per-file tasks.
-
-    Uses direct Redis locks to avoid overlapping runs.
-    """
-    task_logger.info("check_user_file_processing - Starting")
-
-    redis_client = get_redis_client(tenant_id=tenant_id)
-    lock: RedisLock = redis_client.lock(
-        OnyxRedisLocks.USER_FILE_PROCESSING_BEAT_LOCK,
-        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
-    )
-
-    # Do not overlap generator runs
-    if not lock.acquire(blocking=False):
-        return None
-
-    enqueued = 0
-    try:
-        with get_session_with_current_tenant() as db_session:
-            user_file_ids = (
-                db_session.execute(
-                    select(UserFile.id).where(
-                        UserFile.status == UserFileStatus.PROCESSING
-                    )
-                )
-                .scalars()
-                .all()
-            )
-
-            for user_file_id in user_file_ids:
-                self.app.send_task(
-                    OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
-                    kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
-                    queue=OnyxCeleryQueues.USER_FILE_PROCESSING,
-                    priority=OnyxCeleryPriority.HIGH,
-                )
-                enqueued += 1
-
-    finally:
-        if lock.owned():
-            lock.release()
-
-    task_logger.info(
-        f"check_user_file_processing - Enqueued {enqueued} tasks for tenant={tenant_id}"
-    )
-    return None
-
-
-@shared_task(
-    name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE,
-    bind=True,
-    ignore_result=True,
-)
-def process_single_user_file(self: Task, *, user_file_id: str, tenant_id: str) -> None:
-    task_logger.info(f"process_single_user_file - Starting id={user_file_id}")
-    start = time.monotonic()
-
-    redis_client = get_redis_client(tenant_id=tenant_id)
-    file_lock: RedisLock = redis_client.lock(
-        _user_file_lock_key(user_file_id), timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT
-    )
-
-    if not file_lock.acquire(blocking=False):
-        task_logger.info(
-            f"process_single_user_file - Lock held, skipping user_file_id={user_file_id}"
-        )
-        return None
-
-    documents: list[Document] = []
-    try:
-        with get_session_with_current_tenant() as db_session:
-            uf = db_session.get(UserFile, _as_uuid(user_file_id))
-            if not uf:
-                task_logger.warning(
-                    f"process_single_user_file - UserFile not found id={user_file_id}"
-                )
-                return None
-
-            if uf.status != UserFileStatus.PROCESSING:
-                task_logger.info(
-                    f"process_single_user_file - Skipping id={user_file_id} status={uf.status}"
-                )
-                return None
-
-            connector = LocalFileConnector(
-                file_locations=[uf.file_id],
-                file_names=[uf.name] if uf.name else None,
-                zip_metadata={},
-            )
-            connector.load_credentials({})
-
-            # 20 is the documented default for httpx max_keepalive_connections
-            if MANAGED_VESPA:
-                httpx_init_vespa_pool(
-                    20, ssl_cert=VESPA_CLOUD_CERT_PATH, ssl_key=VESPA_CLOUD_KEY_PATH
-                )
-            else:
-                httpx_init_vespa_pool(20)
-
-            search_settings_list = get_active_search_settings_list(db_session)
-
-            current_search_settings = next(
-                (
-                    search_settings_instance
-                    for search_settings_instance in search_settings_list
-                    if search_settings_instance.status.is_current()
-                ),
-                None,
-            )
-
-            if current_search_settings is None:
-                raise RuntimeError(
-                    f"process_single_user_file - No current search settings found for tenant={tenant_id}"
-                )
-
-            try:
-                for batch in connector.load_from_state():
-                    documents.extend(batch)
-
-                adapter = UserFileIndexingAdapter(
-                    tenant_id=tenant_id,
-                    db_session=db_session,
-                )
-
-                # Set up indexing pipeline components
-                embedding_model = DefaultIndexingEmbedder.from_db_search_settings(
-                    search_settings=current_search_settings,
-                )
-
-                information_content_classification_model = (
-                    InformationContentClassificationModel()
-                )
-
-                document_index = get_default_document_index(
-                    current_search_settings,
-                    None,
-                    httpx_client=HttpxPool.get("vespa"),
-                )
-
-                # update the doument id to userfile id in the documents
-                for document in documents:
-                    document.id = str(user_file_id)
-                    document.source = DocumentSource.USER_FILE
-
-                # real work happens here!
-                index_pipeline_result = run_indexing_pipeline(
-                    embedder=embedding_model,
-                    information_content_classification_model=information_content_classification_model,
-                    document_index=document_index,
-                    ignore_time_skip=True,
-                    db_session=db_session,
-                    tenant_id=tenant_id,
-                    document_batch=documents,
-                    request_id=None,
-                    adapter=adapter,
-                )
-
-                task_logger.info(
-                    f"process_single_user_file - Indexing pipeline completed ={index_pipeline_result}"
-                )
-
-                if (
-                    index_pipeline_result.failures
-                    or index_pipeline_result.total_docs != len(documents)
-                    or index_pipeline_result.total_chunks == 0
-                ):
-                    task_logger.error(
-                        f"process_single_user_file - Indexing pipeline failed id={user_file_id}"
-                    )
-                    uf.status = UserFileStatus.FAILED
-                    db_session.add(uf)
-                    db_session.commit()
-                    return None
-
-            except Exception as e:
-                task_logger.exception(
-                    f"process_single_user_file - Error processing file id={user_file_id} - {e.__class__.__name__}"
-                )
-                uf.status = UserFileStatus.FAILED
-                db_session.add(uf)
-                db_session.commit()
-                return None
-
-        elapsed = time.monotonic() - start
-        task_logger.info(
-            f"process_single_user_file - Finished id={user_file_id} docs={len(documents)} elapsed={elapsed:.2f}s"
-        )
-        return None
-    except Exception as e:
-        # Attempt to mark the file as failed
-        with get_session_with_current_tenant() as db_session:
-            uf = db_session.get(UserFile, _as_uuid(user_file_id))
-            if uf:
-                uf.status = UserFileStatus.FAILED
-                db_session.add(uf)
-                db_session.commit()
-
-        task_logger.exception(
-            f"process_single_user_file - Error processing file id={user_file_id} - {e.__class__.__name__}"
-        )
-        return None
-    finally:
-        if file_lock.owned():
-            file_lock.release()
-
-
-@shared_task(
-    name=OnyxCeleryTask.CHECK_FOR_USER_FILE_PROJECT_SYNC,
-    soft_time_limit=300,
-    bind=True,
-    ignore_result=True,
-)
-def check_for_user_file_project_sync(self: Task, *, tenant_id: str) -> None:
-    """Scan for user files with PROJECT_SYNC status and enqueue per-file tasks."""
-    task_logger.info("check_for_user_file_project_sync - Starting")
-
-    redis_client = get_redis_client(tenant_id=tenant_id)
-    lock: RedisLock = redis_client.lock(
-        OnyxRedisLocks.USER_FILE_PROJECT_SYNC_BEAT_LOCK,
-        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
-    )
-
-    if not lock.acquire(blocking=False):
-        return None
-
-    enqueued = 0
-    try:
-        with get_session_with_current_tenant() as db_session:
-            user_file_ids = (
-                db_session.execute(
-                    select(UserFile.id).where(
-                        UserFile.needs_project_sync.is_(True)
-                        and UserFile.status == UserFileStatus.COMPLETED
-                    )
-                )
-                .scalars()
-                .all()
-            )
-
-            for user_file_id in user_file_ids:
-                self.app.send_task(
-                    OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
-                    kwargs={"user_file_id": str(user_file_id), "tenant_id": tenant_id},
-                    queue=OnyxCeleryQueues.USER_FILE_PROJECT_SYNC,
-                    priority=OnyxCeleryPriority.HIGH,
-                )
-                enqueued += 1
-    finally:
-        if lock.owned():
-            lock.release()
-
-    task_logger.info(
-        f"check_for_user_file_project_sync - Enqueued {enqueued} tasks for tenant={tenant_id}"
-    )
-    return None
-
-
-@shared_task(
-    name=OnyxCeleryTask.PROCESS_SINGLE_USER_FILE_PROJECT_SYNC,
-    bind=True,
-    ignore_result=True,
-)
-def process_single_user_file_project_sync(
-    self: Task, *, user_file_id: str, tenant_id: str
-) -> None:
-    """Process a single user file project sync."""
-    task_logger.info(
-        f"process_single_user_file_project_sync - Starting id={user_file_id}"
-    )
-
-    redis_client = get_redis_client(tenant_id=tenant_id)
-    file_lock: RedisLock = redis_client.lock(
-        _user_file_project_sync_lock_key(user_file_id),
-        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
-    )
-
-    if not file_lock.acquire(blocking=False):
-        task_logger.info(
-            f"process_single_user_file_project_sync - Lock held, skipping user_file_id={user_file_id}"
-        )
-        return None
-
-    try:
-        with get_session_with_current_tenant() as db_session:
-            active_search_settings = get_active_search_settings(db_session)
-            doc_index = get_default_document_index(
-                search_settings=active_search_settings.primary,
-                secondary_search_settings=active_search_settings.secondary,
-                httpx_client=HttpxPool.get("vespa"),
-            )
-            retry_index = RetryDocumentIndex(doc_index)
-
-            user_file = db_session.get(UserFile, _as_uuid(user_file_id))
-            if not user_file:
-                task_logger.info(
-                    f"process_single_user_file_project_sync - User file not found id={user_file_id}"
-                )
-                return None
-
-            project_ids = [project.id for project in user_file.projects]
-            chunks_affected = retry_index.update_single(
-                doc_id=str(user_file.id),
-                tenant_id=tenant_id,
-                chunk_count=user_file.chunk_count,
-                fields=None,
-                user_fields=VespaDocumentUserFields(user_projects=project_ids),
-            )
-
-            task_logger.info(
-                f"process_single_user_file_project_sync - Chunks affected id={user_file_id} chunks={chunks_affected}"
-            )
-
-            user_file.needs_project_sync = False
-            user_file.last_project_sync_at = datetime.datetime.now(
-                datetime.timezone.utc
-            )
-            db_session.add(user_file)
-            db_session.commit()
-
-    except Exception as e:
-        task_logger.exception(
-            f"process_single_user_file_project_sync - Error syncing project for file id={user_file_id} - {e.__class__.__name__}"
-        )
-        return None
-    finally:
-        if file_lock.owned():
-            file_lock.release()
-
-    return None
-
-
-def _normalize_legacy_user_file_doc_id(old_id: str) -> str:
-    # Convert USER_FILE_CONNECTOR__<uuid> -> FILE_CONNECTOR__<uuid> for legacy values
-    user_prefix = "USER_FILE_CONNECTOR__"
-    file_prefix = "FILE_CONNECTOR__"
-    if old_id.startswith(user_prefix):
-        remainder = old_id[len(user_prefix) :]
-        return file_prefix + remainder
-    return old_id
-
-
-def _visit_chunks(
-    *,
-    http_client: httpx.Client,
-    index_name: str,
-    selection: str,
-    continuation: str | None = None,
-) -> tuple[list[dict[str, Any]], str | None]:
-    base_url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)
-    params: dict[str, str] = {
-        "selection": selection,
-        "wantedDocumentCount": "1000",
-    }
-    if continuation:
-        params["continuation"] = continuation
-    resp = http_client.get(base_url, params=params, timeout=None)
-    resp.raise_for_status()
-    payload = resp.json()
-    return payload.get("documents", []), payload.get("continuation")
-
-
-def _update_document_id_in_vespa(
-    *,
-    index_name: str,
-    old_doc_id: str,
-    new_doc_id: str,
-    user_project_ids: list[int] | None = None,
-) -> None:
-    clean_new_doc_id = replace_invalid_doc_id_characters(new_doc_id)
-    normalized_old = _normalize_legacy_user_file_doc_id(old_doc_id)
-    clean_old_doc_id = replace_invalid_doc_id_characters(normalized_old)
-
-    selection = f"{index_name}.document_id=='{clean_old_doc_id}'"
-    task_logger.debug(f"Vespa selection: {selection}")
-
-    with get_vespa_http_client() as http_client:
-        continuation: str | None = None
-        while True:
-            docs, continuation = _visit_chunks(
-                http_client=http_client,
-                index_name=index_name,
-                selection=selection,
-                continuation=continuation,
-            )
-            if not docs:
-                break
-            for doc in docs:
-                vespa_full_id = doc.get("id")
-                if not vespa_full_id:
-                    continue
-                vespa_doc_uuid = vespa_full_id.split("::")[-1]
-                vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_doc_uuid}"
-                update_request: dict[str, Any] = {
-                    "fields": {"document_id": {"assign": clean_new_doc_id}}
-                }
-                if user_project_ids is not None:
-                    update_request["fields"][USER_PROJECT] = {
-                        "assign": user_project_ids
-                    }
-                r = http_client.put(vespa_url, json=update_request)
-                r.raise_for_status()
-            if not continuation:
-                break
-
-
-@shared_task(
-    name=OnyxCeleryTask.USER_FILE_DOCID_MIGRATION,
-    ignore_result=True,
-    soft_time_limit=LIGHT_SOFT_TIME_LIMIT,
-    time_limit=LIGHT_TIME_LIMIT,
-    bind=True,
-)
-def user_file_docid_migration_task(self: Task, *, tenant_id: str) -> bool:
-    """Per-tenant job to update Vespa and search_doc document_id values for user files.
-
-    - For each user_file with a legacy document_id, set Vespa `document_id` to the UUID `user_file.id`.
-    - Update `search_doc.document_id` to the same UUID string.
-    """
-
-    try:
-        with get_session_with_current_tenant() as db_session:
-            active_settings = get_active_search_settings(db_session)
-            document_index = get_default_document_index(
-                active_settings.primary,
-                active_settings.secondary,
-            )
-            if hasattr(document_index, "index_name"):
-                index_name = document_index.index_name
-            else:
-                index_name = "danswer_index"
-
-            # Fetch mappings of legacy -> new ids
-            rows = db_session.execute(
-                sa.select(
-                    UserFile.document_id.label("document_id"),
-                    UserFile.id.label("id"),
-                ).where(
-                    UserFile.document_id.is_not(None),
-                    UserFile.document_id_migrated.is_(False),
-                )
-            ).all()
-
-            # dedupe by old document_id
-            seen: set[str] = set()
-            for row in rows:
-                old_doc_id = str(row.document_id)
-                new_uuid = str(row.id)
-                if not old_doc_id or not new_uuid or old_doc_id in seen:
-                    continue
-                seen.add(old_doc_id)
-                # collect user project ids for a combined Vespa update
-                user_project_ids: list[int] | None = None
-                try:
-                    uf = db_session.get(UserFile, UUID(new_uuid))
-                    if uf is not None:
-                        user_project_ids = [project.id for project in uf.projects]
-                except Exception as e:
-                    task_logger.warning(
-                        f"Tenant={tenant_id} failed fetching projects for doc_id={new_uuid} - {e.__class__.__name__}"
-                    )
-                try:
-                    _update_document_id_in_vespa(
-                        index_name=index_name,
-                        old_doc_id=old_doc_id,
-                        new_doc_id=new_uuid,
-                        user_project_ids=user_project_ids,
-                    )
-                except Exception as e:
-                    task_logger.warning(
-                        f"Tenant={tenant_id} failed Vespa update for doc_id={new_uuid} - {e.__class__.__name__}"
-                    )
-            # Update search_doc records to refer to the UUID string
-            # we are not using document_id_migrated = false because if the migration already completed,
-            # it will not run again and we will not update the search_doc records because of the issue currently fixed
-            user_files = (
-                db_session.execute(
-                    sa.select(UserFile).where(UserFile.document_id.is_not(None))
-                )
-                .scalars()
-                .all()
-            )
-
-            # Query all SearchDocs that need updating
-            search_docs = (
-                db_session.execute(
-                    sa.select(SearchDoc).where(
-                        SearchDoc.document_id.like("%FILE_CONNECTOR__%")
-                    )
-                )
-                .scalars()
-                .all()
-            )
-
-            task_logger.info(f"Found {len(user_files)} user files to update")
-            task_logger.info(f"Found {len(search_docs)} search docs to update")
-
-            # Build a map of normalized doc IDs to SearchDocs
-            search_doc_map: dict[str, list[SearchDoc]] = {}
-            for sd in search_docs:
-                doc_id = sd.document_id
-                if search_doc_map.get(doc_id) is None:
-                    search_doc_map[doc_id] = []
-                search_doc_map[doc_id].append(sd)
-
-            task_logger.debug(
-                f"Built search doc map with {len(search_doc_map)} entries"
-            )
-            ids_preview = list(search_doc_map.keys())[:5]
-            task_logger.debug(
-                f"First few search_doc_map ids: {ids_preview if ids_preview else 'No ids found'}"
-            )
-            task_logger.debug(
-                f"search_doc_map total items: {sum(len(docs) for docs in search_doc_map.values())}"
-            )
-            # Process each UserFile and update matching SearchDocs
-            updated_count = 0
-            for uf in user_files:
-                doc_id = uf.document_id
-                if doc_id.startswith("USER_FILE_CONNECTOR__"):
-                    doc_id = "FILE_CONNECTOR__" + doc_id[len("USER_FILE_CONNECTOR__") :]
-
-                task_logger.debug(f"Processing user file {uf.id} with doc_id {doc_id}")
-                task_logger.debug(
-                    f"doc_id in search_doc_map: {doc_id in search_doc_map}"
-                )
-
-                if doc_id in search_doc_map:
-                    search_docs = search_doc_map[doc_id]
-                    task_logger.debug(
-                        f"Found {len(search_docs)} search docs to update for user file {uf.id}"
-                    )
-                    # Update the SearchDoc to use the UserFile's UUID
-                    for search_doc in search_docs:
-                        search_doc.document_id = str(uf.id)
-                        db_session.add(search_doc)
-
-                    # Mark UserFile as migrated
-                    uf.document_id_migrated = True
-                    db_session.add(uf)
-                    updated_count += 1
-
-            task_logger.info(
-                f"Updated {updated_count} SearchDoc records with new UUIDs"
-            )
-            db_session.commit()
-
-            # Normalize plaintext FileRecord blobs: ensure S3 object key aligns with current file_id
-            try:
-                store = get_default_file_store()
-                # Only supported for S3-backed stores where we can manipulate object keys
-                if isinstance(store, S3BackedFileStore):
-                    s3_client = store._get_s3_client()
-                    bucket_name = store._get_bucket_name()
-
-                    plaintext_records: Sequence[FileRecord] = (
-                        db_session.execute(
-                            sa.select(FileRecord).where(
-                                FileRecord.file_origin == FileOrigin.PLAINTEXT_CACHE,
-                                FileRecord.file_id.like("plaintext_%"),
-                            )
-                        )
-                        .scalars()
-                        .all()
-                    )
-
-                    normalized = 0
-                    for fr in plaintext_records:
-                        try:
-                            expected_key = store._get_s3_key(fr.file_id)
-                            if fr.object_key == expected_key:
-                                continue
-
-                            # Copy old object to new key
-                            copy_source = f"{fr.bucket_name}/{fr.object_key}"
-                            s3_client.copy_object(
-                                CopySource=copy_source,
-                                Bucket=bucket_name,
-                                Key=expected_key,
-                                MetadataDirective="COPY",
-                            )
-
-                            # Delete old object (best-effort)
-                            try:
-                                s3_client.delete_object(
-                                    Bucket=fr.bucket_name, Key=fr.object_key
-                                )
-                            except Exception:
-                                pass
-
-                            # Update DB record with new key
-                            fr.object_key = expected_key
-                            db_session.add(fr)
-                            normalized += 1
-                        except Exception as e:
-                            task_logger.warning(
-                                f"Tenant={tenant_id} failed plaintext object normalize for "
-                                f"id={fr.file_id} - {e.__class__.__name__}"
-                            )
-
-                    if normalized:
-                        db_session.commit()
-                        task_logger.info(
-                            f"user_file_docid_migration_task normalized {normalized} plaintext objects for tenant={tenant_id}"
-                        )
-                else:
-                    task_logger.info(
-                        "user_file_docid_migration_task skipping plaintext object normalization (non-S3 store)"
-                    )
-            except Exception:
-                task_logger.exception(
-                    f"user_file_docid_migration_task - Error during plaintext normalization for tenant={tenant_id}"
-                )
-
-        task_logger.info(
-            f"user_file_docid_migration_task completed for tenant={tenant_id} (rows={len(rows)})"
-        )
-        return True
-    except Exception:
-        task_logger.exception(
-            f"user_file_docid_migration_task - Error during execution for tenant={tenant_id}"
-        )
-        return False
--- a/backend/onyx/background/celery/versioned_apps/user_file_processing.py
+++ b/backend/onyx/background/celery/versioned_apps/user_file_processing.py
@@ -1,16 +0,0 @@
-"""Factory stub for running the user file processing Celery worker."""
-
-from celery import Celery
-
-from onyx.utils.variable_functionality import set_is_ee_based_on_env_variable
-
-set_is_ee_based_on_env_variable()
-
-
-def get_app() -> Celery:
-    from onyx.background.celery.apps.user_file_processing import celery_app
-
-    return celery_app
-
-
-app = get_app()
--- a/backend/onyx/background/indexing/index_attempt_utils.py
+++ b/backend/onyx/background/indexing/index_attempt_utils.py
@@ -5,7 +5,6 @@ from sqlalchemy.orm import Session
 from onyx.configs.constants import NUM_DAYS_TO_KEEP_INDEX_ATTEMPTS
 from onyx.db.engine.time_utils import get_db_current_time
 from onyx.db.models import IndexAttempt
-from onyx.db.models import IndexAttemptError


 def get_old_index_attempts(
@@ -22,10 +21,6 @@ def get_old_index_attempts(

 def cleanup_index_attempts(db_session: Session, index_attempt_ids: list[int]) -> None:
    """Clean up multiple index attempts"""
-    db_session.query(IndexAttemptError).filter(
-        IndexAttemptError.index_attempt_id.in_(index_attempt_ids)
-    ).delete(synchronize_session=False)
-
    db_session.query(IndexAttempt).filter(
        IndexAttempt.id.in_(index_attempt_ids)
    ).delete(synchronize_session=False)
--- a/backend/onyx/background/indexing/run_docfetching.py
+++ b/backend/onyx/background/indexing/run_docfetching.py
@@ -64,11 +64,9 @@ from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.document_batch_storage import DocumentBatchStorage
 from onyx.file_store.document_batch_storage import get_document_batch_storage
 from onyx.httpx.httpx_pool import HttpxPool
-from onyx.indexing.adapters.document_indexing_adapter import (
-    DocumentIndexingBatchAdapter,
-)
 from onyx.indexing.embedder import DefaultIndexingEmbedder
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
+from onyx.indexing.indexing_pipeline import run_indexing_pipeline
 from onyx.natural_language_processing.search_nlp_models import (
    InformationContentClassificationModel,
 )
@@ -102,7 +100,6 @@ def _get_connector_runner(
    are the complete list of existing documents of the connector. If the task
    of type LOAD_STATE, the list will be considered complete and otherwise incomplete.
    """
-
    task = attempt.connector_credential_pair.connector.input_type

    try:
@@ -286,8 +283,6 @@ def _run_indexing(
    2. Embed and index these documents into the chosen datastore (vespa)
    3. Updates Postgres to record the indexed documents + the outcome of this run
    """
-    from onyx.indexing.indexing_pipeline import run_indexing_pipeline
-
    start_time = time.monotonic()  # jsut used for logging

    with get_session_with_current_tenant() as db_session_temp:
@@ -572,13 +567,6 @@ def _run_indexing(
                index_attempt_md.batch_num = batch_num + 1  # use 1-index for this

                # real work happens here!
-                adapter = DocumentIndexingBatchAdapter(
-                    db_session=db_session,
-                    connector_id=ctx.connector_id,
-                    credential_id=ctx.credential_id,
-                    tenant_id=tenant_id,
-                    index_attempt_metadata=index_attempt_md,
-                )
                index_pipeline_result = run_indexing_pipeline(
                    embedder=embedding_model,
                    information_content_classification_model=information_content_classification_model,
@@ -590,8 +578,7 @@ def _run_indexing(
                    db_session=db_session,
                    tenant_id=tenant_id,
                    document_batch=doc_batch_cleaned,
-                    request_id=index_attempt_md.request_id,
-                    adapter=adapter,
+                    index_attempt_metadata=index_attempt_md,
                )

                batch_num += 1
--- a/backend/onyx/chat/answer.py
+++ b/backend/onyx/chat/answer.py
@@ -62,7 +62,6 @@ class Answer:
        use_agentic_search: bool = False,
        research_type: ResearchType | None = None,
        research_plan: dict[str, Any] | None = None,
-        project_instructions: str | None = None,
    ) -> None:
        self.is_connected: Callable[[], bool] | None = is_connected
        self._processed_stream: list[AnswerStreamPart] | None = None
@@ -98,7 +97,6 @@ class Answer:
            prompt_builder=prompt_builder,
            files=latest_query_files,
            structured_response_format=answer_style_config.structured_response_format,
-            project_instructions=project_instructions,
        )
        self.graph_tooling = GraphTooling(
            primary_llm=llm,
--- a/backend/onyx/chat/chat_utils.py
+++ b/backend/onyx/chat/chat_utils.py
@@ -32,7 +32,6 @@ from onyx.db.llm import fetch_existing_doc_sets
 from onyx.db.llm import fetch_existing_tools
 from onyx.db.models import ChatMessage
 from onyx.db.models import Persona
-from onyx.db.models import SearchDoc as DbSearchDoc
 from onyx.db.models import Tool
 from onyx.db.models import User
 from onyx.db.search_settings import get_current_search_settings
@@ -43,7 +42,6 @@ from onyx.kg.setup.kg_default_entity_definitions import (
 from onyx.llm.models import PreviousMessage
 from onyx.llm.override_models import LLMOverride
 from onyx.natural_language_processing.utils import BaseTokenizer
-from onyx.onyxbot.slack.models import SlackContext
 from onyx.server.query_and_chat.models import CreateChatMessageRequest
 from onyx.server.query_and_chat.streaming_models import CitationInfo
 from onyx.tools.tool_implementations.custom.custom_tool import (
@@ -68,7 +66,6 @@ def prepare_chat_message_request(
    skip_gen_ai_answer_generation: bool = False,
    llm_override: LLMOverride | None = None,
    allowed_tool_ids: list[int] | None = None,
-    slack_context: SlackContext | None = None,
 ) -> CreateChatMessageRequest:
    # Typically used for one shot flows like SlackBot or non-chat API endpoint use cases
    new_chat_session = create_chat_session(
@@ -96,7 +93,6 @@ def prepare_chat_message_request(
        skip_gen_ai_answer_generation=skip_gen_ai_answer_generation,
        llm_override=llm_override,
        allowed_tool_ids=allowed_tool_ids,
-        slack_context=slack_context,  # Pass Slack context
    )


@@ -344,45 +340,6 @@ def reorganize_citations(
    return new_answer, list(new_citation_info.values())


-def build_citation_map_from_infos(
-    citations_list: list[CitationInfo], db_docs: list[DbSearchDoc]
-) -> dict[int, int]:
-    """Translate a list of streaming CitationInfo objects into a mapping of
-    citation number -> saved search doc DB id.
-
-    Always cites the first instance of a document_id and assumes db_docs are
-    ordered as shown to the user (display order).
-    """
-    doc_id_to_saved_doc_id_map: dict[str, int] = {}
-    for db_doc in db_docs:
-        if db_doc.document_id not in doc_id_to_saved_doc_id_map:
-            doc_id_to_saved_doc_id_map[db_doc.document_id] = db_doc.id
-
-    citation_to_saved_doc_id_map: dict[int, int] = {}
-    for citation in citations_list:
-        if citation.citation_num not in citation_to_saved_doc_id_map:
-            saved_id = doc_id_to_saved_doc_id_map.get(citation.document_id)
-            if saved_id is not None:
-                citation_to_saved_doc_id_map[citation.citation_num] = saved_id
-
-    return citation_to_saved_doc_id_map
-
-
-def build_citation_map_from_numbers(
-    cited_numbers: list[int] | set[int], db_docs: list[DbSearchDoc]
-) -> dict[int, int]:
-    """Translate parsed citation numbers (e.g., from [[n]]) into a mapping of
-    citation number -> saved search doc DB id by positional index.
-    """
-    citation_to_saved_doc_id_map: dict[int, int] = {}
-    for num in sorted(set(cited_numbers)):
-        idx = num - 1
-        if 0 <= idx < len(db_docs):
-            citation_to_saved_doc_id_map[num] = db_docs[idx].id
-
-    return citation_to_saved_doc_id_map
-
-
 def extract_headers(
    headers: dict[str, str] | Headers, pass_through_headers: list[str] | None
 ) -> dict[str, str]:
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -5,7 +5,6 @@ from collections.abc import Callable
 from collections.abc import Iterator
 from typing import cast
 from typing import Protocol
-from uuid import UUID

 from sqlalchemy.orm import Session

@@ -19,7 +18,6 @@ from onyx.chat.models import AnswerStyleConfig
 from onyx.chat.models import ChatBasicResponse
 from onyx.chat.models import CitationConfig
 from onyx.chat.models import DocumentPruningConfig
-from onyx.chat.models import LlmDoc
 from onyx.chat.models import MessageResponseIDInfo
 from onyx.chat.models import MessageSpecificCitations
 from onyx.chat.models import PromptConfig
@@ -37,7 +35,6 @@ from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from onyx.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH
 from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from onyx.configs.chat_configs import SELECTED_SECTIONS_MAX_WINDOW_PERCENTAGE
-from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import MessageType
 from onyx.configs.constants import MilestoneRecordType
 from onyx.configs.constants import NO_AUTH_USER_ID
@@ -66,13 +63,9 @@ from onyx.db.models import SearchDoc as DbSearchDoc
 from onyx.db.models import ToolCall
 from onyx.db.models import User
 from onyx.db.persona import get_persona_by_id
-from onyx.db.projects import get_project_instructions
-from onyx.db.projects import get_user_files_from_project
 from onyx.db.search_settings import get_current_search_settings
 from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.models import FileDescriptor
-from onyx.file_store.models import InMemoryChatFile
-from onyx.file_store.utils import build_frontend_file_url
 from onyx.file_store.utils import load_all_chat_files
 from onyx.kg.models import KGException
 from onyx.llm.exceptions import GenAIDisabledException
@@ -108,7 +101,6 @@ from onyx.utils.timing import log_function_time
 from onyx.utils.timing import log_generator_function_time
 from shared_configs.contextvars import get_current_tenant_id

-
 logger = setup_logger()
 ERROR_TYPE_CANCELLED = "cancelled"

@@ -127,66 +119,6 @@ class PartialResponse(Protocol):
    ) -> ChatMessage: ...


-def _build_project_llm_docs(
-    project_file_ids: list[str] | None,
-    in_memory_user_files: list[InMemoryChatFile] | None,
-) -> list[LlmDoc]:
-    """Construct `LlmDoc` objects for project-scoped user files for citation flow."""
-    project_llm_docs: list[LlmDoc] = []
-    if not project_file_ids or not in_memory_user_files:
-        return project_llm_docs
-
-    project_file_id_set = set(project_file_ids)
-    for f in in_memory_user_files:
-        if project_file_id_set and (f.file_id in project_file_id_set):
-
-            def _strip_nuls(s: str) -> str:
-                return s.replace("\x00", "") if s else s
-
-            cleaned_filename = _strip_nuls(f.filename or str(f.file_id))
-
-            if f.file_type.is_text_file():
-                try:
-                    text_content = f.content.decode("utf-8", errors="ignore")
-                    text_content = _strip_nuls(text_content)
-                except Exception:
-                    text_content = ""
-
-                # Build a short blurb from the file content for better UI display
-                blurb = (
-                    (text_content[:200] + "...")
-                    if len(text_content) > 200
-                    else text_content
-                )
-            else:
-                # Non-text (e.g., images): do not decode bytes; keep empty content but allow citation
-                text_content = ""
-                blurb = f"[{f.file_type.value}] {cleaned_filename}"
-
-            # Provide basic metadata to improve SavedSearchDoc display
-            file_metadata: dict[str, str | list[str]] = {
-                "filename": cleaned_filename,
-                "file_type": f.file_type.value,
-            }
-
-            project_llm_docs.append(
-                LlmDoc(
-                    document_id=str(f.file_id),
-                    content=text_content,
-                    blurb=blurb,
-                    semantic_identifier=cleaned_filename,
-                    source_type=DocumentSource.USER_FILE,
-                    metadata=file_metadata,
-                    updated_at=None,
-                    link=build_frontend_file_url(str(f.file_id)),
-                    source_links=None,
-                    match_highlights=None,
-                )
-            )
-
-    return project_llm_docs
-
-
 def _translate_citations(
    citations_list: list[CitationInfo], db_docs: list[DbSearchDoc]
 ) -> MessageSpecificCitations:
@@ -504,29 +436,26 @@ def stream_chat_message_objects(
        files = load_all_chat_files(history_msgs, new_msg_req.file_descriptors)
        req_file_ids = [f["id"] for f in new_msg_req.file_descriptors]
        latest_query_files = [file for file in files if file.file_id in req_file_ids]
-        user_file_ids: list[UUID] = []
+        user_file_ids = new_msg_req.user_file_ids or []
+        user_folder_ids = new_msg_req.user_folder_ids or []

        if persona.user_files:
-            for uf in persona.user_files:
-                user_file_ids.append(uf.id)
-
-        if new_msg_req.current_message_files:
-            for fd in new_msg_req.current_message_files:
-                uid = fd.get("user_file_id")
-                if uid is not None:
-                    user_file_id = UUID(uid)
-                    user_file_ids.append(user_file_id)
+            for file in persona.user_files:
+                user_file_ids.append(file.id)
+        if persona.user_folders:
+            for folder in persona.user_folders:
+                user_folder_ids.append(folder.id)

        # Load in user files into memory and create search tool override kwargs if needed
-        # if we have enough tokens, we don't need to use search
+        # if we have enough tokens and no folders, we don't need to use search
        # we can just pass them into the prompt directly
        (
            in_memory_user_files,
            user_file_models,
            search_tool_override_kwargs_for_user_files,
        ) = parse_user_files(
-            user_file_ids=user_file_ids or [],
-            project_id=chat_session.project_id,
+            user_file_ids=user_file_ids,
+            user_folder_ids=user_folder_ids,
            db_session=db_session,
            persona=persona,
            actual_user_input=message_text,
@@ -535,37 +464,16 @@ def stream_chat_message_objects(
        if not search_tool_override_kwargs_for_user_files:
            latest_query_files.extend(in_memory_user_files)

-        project_file_ids = []
-        if chat_session.project_id:
-            project_file_ids.extend(
-                [
-                    file.file_id
-                    for file in get_user_files_from_project(
-                        chat_session.project_id, user_id, db_session
-                    )
-                ]
-            )
-
-        # we don't want to attach project files to the user message
        if user_message:
            attach_files_to_chat_message(
                chat_message=user_message,
                files=[
-                    new_file.to_file_descriptor()
-                    for new_file in latest_query_files
-                    if project_file_ids is not None
-                    and (new_file.file_id not in project_file_ids)
+                    new_file.to_file_descriptor() for new_file in latest_query_files
                ],
                db_session=db_session,
                commit=False,
            )

-        # Build project context docs for citation flow if project files are present
-        project_llm_docs: list[LlmDoc] = _build_project_llm_docs(
-            project_file_ids=project_file_ids,
-            in_memory_user_files=in_memory_user_files,
-        )
-
        selected_db_search_docs = None
        selected_sections: list[InferenceSection] | None = None
        if reference_doc_ids:
@@ -651,22 +559,12 @@ def stream_chat_message_objects(
        else:
            prompt_config = PromptConfig.from_model(persona)

-        # Retrieve project-specific instructions if this chat session is associated with a project.
-        project_instructions: str | None = (
-            get_project_instructions(
-                db_session=db_session, project_id=chat_session.project_id
-            )
-            if persona.is_default_persona
-            else None
-        )  # if the persona is not default, we don't want to use the project instructions
-
        answer_style_config = AnswerStyleConfig(
            citation_config=CitationConfig(
                all_docs_useful=selected_db_search_docs is not None
            ),
            structured_response_format=new_msg_req.structured_response_format,
        )
-        has_project_files = project_file_ids is not None and len(project_file_ids) > 0

        tool_dict = construct_tools(
            persona=persona,
@@ -676,17 +574,9 @@ def stream_chat_message_objects(
            llm=llm,
            fast_llm=fast_llm,
            run_search_setting=(
-                OptionalSearchSetting.NEVER
-                if (
-                    chat_session.project_id
-                    and not has_project_files
-                    and persona.is_default_persona
-                )
-                else (
-                    retrieval_options.run_search
-                    if retrieval_options
-                    else OptionalSearchSetting.AUTO
-                )
+                retrieval_options.run_search
+                if retrieval_options
+                else OptionalSearchSetting.AUTO
            ),
            search_tool_config=SearchToolConfig(
                answer_style_config=answer_style_config,
@@ -713,7 +603,6 @@ def stream_chat_message_objects(
                additional_headers=custom_tool_additional_headers,
            ),
            allowed_tool_ids=new_msg_req.allowed_tool_ids,
-            slack_context=new_msg_req.slack_context,  # Pass Slack context from request
        )

        tools: list[Tool] = []
@@ -728,7 +617,6 @@ def stream_chat_message_objects(
        message_history = [
            PreviousMessage.from_chat_message(msg, files) for msg in history_msgs
        ]
-
        if not search_tool_override_kwargs_for_user_files and in_memory_user_files:
            yield UserKnowledgeFilePacket(
                user_files=[
@@ -736,8 +624,6 @@ def stream_chat_message_objects(
                        id=str(file.file_id), type=file.file_type, name=file.filename
                    )
                    for file in in_memory_user_files
-                    if project_file_ids is not None
-                    and (file.file_id not in project_file_ids)
                ]
            )

@@ -756,10 +642,6 @@ def stream_chat_message_objects(
            single_message_history=single_message_history,
        )

-        if project_llm_docs and not search_tool_override_kwargs_for_user_files:
-            # Store for downstream streaming to wire citations and final_documents
-            prompt_builder.context_llm_docs = project_llm_docs
-
        # LLM prompt building, response capturing, etc.
        answer = Answer(
            prompt_builder=prompt_builder,
@@ -788,7 +670,6 @@ def stream_chat_message_objects(
            db_session=db_session,
            use_agentic_search=new_msg_req.use_agentic_search,
            skip_gen_ai_answer_generation=new_msg_req.skip_gen_ai_answer_generation,
-            project_instructions=project_instructions,
        )

        # Process streamed packets using the new packet processing module
--- a/backend/onyx/chat/prompt_builder/answer_prompt_builder.py
+++ b/backend/onyx/chat/prompt_builder/answer_prompt_builder.py
@@ -4,9 +4,9 @@ from typing import cast
 from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.messages import SystemMessage
+from pydantic import BaseModel
 from pydantic.v1 import BaseModel as BaseModel__v1

-from onyx.chat.models import LlmDoc
 from onyx.chat.models import PromptConfig
 from onyx.chat.prompt_builder.citations_prompt import compute_max_llm_input_tokens
 from onyx.chat.prompt_builder.utils import translate_history_to_basemessages
@@ -76,7 +76,6 @@ def default_build_user_message(
        if prompt_config.task_prompt
        else user_query
    )
-
    user_prompt = user_prompt.strip()
    tag_handled_prompt = handle_onyx_date_awareness(user_prompt, prompt_config)
    user_msg = HumanMessage(
@@ -133,10 +132,6 @@ class AnswerPromptBuilder:
        self.raw_user_uploaded_files = raw_user_uploaded_files
        self.single_message_history = single_message_history

-        # Optional: if the prompt includes explicit context documents (e.g., project files),
-        # store them here so downstream streaming can reference them for citation mapping.
-        self.context_llm_docs: list[LlmDoc] | None = None
-
    def update_system_prompt(self, system_message: SystemMessage | None) -> None:
        if not system_message:
            self.system_message_and_token_cnt = None
@@ -201,6 +196,10 @@ class AnswerPromptBuilder:


 # Stores some parts of a prompt builder as needed for tool calls
+class PromptSnapshot(BaseModel):
+    raw_message_history: list[PreviousMessage]
+    raw_user_query: str
+    built_prompt: list[BaseMessage]


 # TODO: rename this? AnswerConfig maybe?
--- a/backend/onyx/chat/prompt_builder/schemas.py
+++ b/backend/onyx/chat/prompt_builder/schemas.py
@@ -1,10 +0,0 @@
-from langchain_core.messages import BaseMessage
-from pydantic import BaseModel
-
-from onyx.llm.models import PreviousMessage
-
-
-class PromptSnapshot(BaseModel):
-    raw_message_history: list[PreviousMessage]
-    raw_user_query: str
-    built_prompt: list[BaseMessage]
--- a/backend/onyx/chat/stream_processing/citation_processing.py
+++ b/backend/onyx/chat/stream_processing/citation_processing.py
@@ -12,35 +12,6 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


-def normalize_square_bracket_citations_to_double_with_links(text: str) -> str:
-    """
-    Normalize citation markers in the text:
-    - Convert bare double-bracket citations without links `[[n]]` to `[[n]]()`
-    - Convert single-bracket citations `[n]` to `[[n]]()`
-    Leaves existing linked citations like `[[n]](http...)` unchanged.
-    """
-    if not text:
-        return ""
-
-    # Add empty parens to bare double-bracket citations without a link: [[n]] -> [[n]]()
-    pattern_double_no_link = re.compile(r"\[\[(\d+)\]\](?!\()")
-
-    def _repl_double(match: re.Match[str]) -> str:
-        num = match.group(1)
-        return f"[[{num}]]()"
-
-    text = pattern_double_no_link.sub(_repl_double, text)
-
-    # Convert single [n] not already [[n]] to [[n]]()
-    pattern_single = re.compile(r"(?<!\[)\[(\d+)\](?!\])")
-
-    def _repl_single(match: re.Match[str]) -> str:
-        num = match.group(1)
-        return f"[[{num}]]()"
-
-    return pattern_single.sub(_repl_single, text)
-
-
 def in_code_block(llm_text: str) -> bool:
    count = llm_text.count(TRIPLE_BACKTICK)
    return count % 2 != 0
--- a/backend/onyx/chat/tool_handling/tool_response_handler.py
+++ b/backend/onyx/chat/tool_handling/tool_response_handler.py
@@ -7,7 +7,7 @@ from langchain_core.messages import ToolCall
 from onyx.chat.models import ResponsePart
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.chat.prompt_builder.answer_prompt_builder import LLMCall
-from onyx.chat.prompt_builder.schemas import PromptSnapshot
+from onyx.chat.prompt_builder.answer_prompt_builder import PromptSnapshot
 from onyx.llm.interfaces import LLM
 from onyx.tools.force import ForceUseTool
 from onyx.tools.message import build_tool_message
--- a/backend/onyx/chat/user_files/parse_user_files.py
+++ b/backend/onyx/chat/user_files/parse_user_files.py
@@ -4,8 +4,6 @@ from sqlalchemy.orm import Session

 from onyx.db.models import Persona
 from onyx.db.models import UserFile
-from onyx.db.projects import get_user_files_from_project
-from onyx.db.user_file import update_last_accessed_at_for_user_files
 from onyx.file_store.models import InMemoryChatFile
 from onyx.file_store.utils import get_user_files_as_user
 from onyx.file_store.utils import load_in_memory_chat_files
@@ -17,24 +15,24 @@ logger = setup_logger()


 def parse_user_files(
-    user_file_ids: list[UUID],
+    user_file_ids: list[int],
+    user_folder_ids: list[int],
    db_session: Session,
    persona: Persona,
    actual_user_input: str,
-    project_id: int | None,
    # should only be None if auth is disabled
    user_id: UUID | None,
 ) -> tuple[list[InMemoryChatFile], list[UserFile], SearchToolOverrideKwargs | None]:
    """
-    Parse user files and project into in-memory chat files and create search tool override kwargs.
-    Only creates SearchToolOverrideKwargs if token overflow occurs.
+    Parse user files and folders into in-memory chat files and create search tool override kwargs.
+    Only creates SearchToolOverrideKwargs if token overflow occurs or folders are present.

    Args:
        user_file_ids: List of user file IDs to load
+        user_folder_ids: List of user folder IDs to load
        db_session: Database session
        persona: Persona to calculate available tokens
        actual_user_input: User's input message for token calculation
-        project_id: Project ID to validate file ownership
        user_id: User ID to validate file ownership

    Returns:
@@ -42,56 +40,37 @@ def parse_user_files(
            loaded user files,
            user file models,
            search tool override kwargs if token
-                overflow
+                overflow or folders present
        )
    """
-    # Return empty results if no files or project specified
-    if not user_file_ids and not project_id:
+    # Return empty results if no files or folders specified
+    if not user_file_ids and not user_folder_ids:
        return [], [], None

-    project_user_file_ids = []
-
-    if project_id:
-        project_user_file_ids.extend(
-            [
-                file.id
-                for file in get_user_files_from_project(project_id, user_id, db_session)
-            ]
-        )
-
-    # Combine user-provided and project-derived user file IDs
-    combined_user_file_ids = user_file_ids + project_user_file_ids or []
-
    # Load user files from the database into memory
    user_files = load_in_memory_chat_files(
-        combined_user_file_ids,
+        user_file_ids or [],
+        user_folder_ids or [],
        db_session,
    )

    user_file_models = get_user_files_as_user(
-        combined_user_file_ids,
+        user_file_ids or [],
+        user_folder_ids or [],
        user_id,
        db_session,
    )

-    # Update last accessed at for the user files which are used in the chat
-    if user_file_ids or project_user_file_ids:
-        # update_last_accessed_at_for_user_files expects list[UUID]
-        update_last_accessed_at_for_user_files(
-            combined_user_file_ids,
-            db_session,
-        )
-
    # Calculate token count for the files, need to import here to avoid circular import
    # TODO: fix this
-    from onyx.db.user_file import calculate_user_files_token_count
+    from onyx.db.user_documents import calculate_user_files_token_count
    from onyx.chat.prompt_builder.citations_prompt import (
        compute_max_document_tokens_for_persona,
    )

-    # calculate_user_files_token_count now expects list[UUID]
    total_tokens = calculate_user_files_token_count(
-        combined_user_file_ids,
+        user_file_ids or [],
+        user_folder_ids or [],
        db_session,
    )

@@ -100,31 +79,27 @@ def parse_user_files(
        persona=persona,
        actual_user_input=actual_user_input,
    )
-    uploaded_context_cap = int(available_tokens * 0.5)

    logger.debug(
-        f"Total file tokens: {total_tokens}, Available tokens: {available_tokens},"
-        f"Allowed uploaded context tokens: {uploaded_context_cap}"
+        f"Total file tokens: {total_tokens}, Available tokens: {available_tokens}"
    )

-    have_enough_tokens = total_tokens <= uploaded_context_cap
+    have_enough_tokens = total_tokens <= available_tokens

-    # If we have enough tokens, we don't need search
+    # If we have enough tokens and no folders, we don't need search
    # we can just pass them into the prompt directly
-    if have_enough_tokens:
+    if have_enough_tokens and not user_folder_ids:
        # No search tool override needed - files can be passed directly
        return user_files, user_file_models, None

-    # Token overflow - need to use search tool
+    # Token overflow or folders present - need to use search tool
    override_kwargs = SearchToolOverrideKwargs(
        force_no_rerank=have_enough_tokens,
        alternate_db_session=None,
        retrieved_sections_callback=None,
        skip_query_analysis=have_enough_tokens,
-        user_file_ids=user_file_ids or [],
-        project_id=(
-            project_id if persona.is_default_persona else None
-        ),  # if the persona is not default, we don't want to use the project files
+        user_file_ids=user_file_ids,
+        user_folder_ids=user_folder_ids,
    )

    return user_files, user_file_models, override_kwargs
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -65,19 +65,19 @@ WEB_DOMAIN = os.environ.get("WEB_DOMAIN") or "http://localhost:3000"
 AUTH_TYPE = AuthType((os.environ.get("AUTH_TYPE") or AuthType.DISABLED.value).lower())
 DISABLE_AUTH = AUTH_TYPE == AuthType.DISABLED

-PASSWORD_MIN_LENGTH = int(os.getenv("PASSWORD_MIN_LENGTH", 8))
+PASSWORD_MIN_LENGTH = int(os.getenv("PASSWORD_MIN_LENGTH", 12))
 PASSWORD_MAX_LENGTH = int(os.getenv("PASSWORD_MAX_LENGTH", 64))
 PASSWORD_REQUIRE_UPPERCASE = (
-    os.environ.get("PASSWORD_REQUIRE_UPPERCASE", "false").lower() == "true"
+    os.environ.get("PASSWORD_REQUIRE_UPPERCASE", "true").lower() == "true"
 )
 PASSWORD_REQUIRE_LOWERCASE = (
-    os.environ.get("PASSWORD_REQUIRE_LOWERCASE", "false").lower() == "true"
+    os.environ.get("PASSWORD_REQUIRE_LOWERCASE", "true").lower() == "true"
 )
 PASSWORD_REQUIRE_DIGIT = (
-    os.environ.get("PASSWORD_REQUIRE_DIGIT", "false").lower() == "true"
+    os.environ.get("PASSWORD_REQUIRE_DIGIT", "true").lower() == "true"
 )
 PASSWORD_REQUIRE_SPECIAL_CHAR = (
-    os.environ.get("PASSWORD_REQUIRE_SPECIAL_CHAR", "false").lower() == "true"
+    os.environ.get("PASSWORD_REQUIRE_SPECIAL_CHAR", "true").lower() == "true"
 )

 # Encryption key secret is used to encrypt connector credentials, api keys, and other sensitive
@@ -362,18 +362,6 @@ CELERY_WORKER_PRIMARY_CONCURRENCY = int(
 CELERY_WORKER_PRIMARY_POOL_OVERFLOW = int(
    os.environ.get("CELERY_WORKER_PRIMARY_POOL_OVERFLOW") or 4
 )
-CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY_DEFAULT = 4
-try:
-    CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY = int(
-        os.environ.get(
-            "CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY",
-            CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY_DEFAULT,
-        )
-    )
-except ValueError:
-    CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY = (
-        CELERY_WORKER_USER_FILE_PROCESSING_CONCURRENCY_DEFAULT
-    )

 # The maximum number of tasks that can be queued up to sync to Vespa in a single pass
 VESPA_SYNC_MAX_TASKS = 8192
@@ -678,8 +666,8 @@ LOG_ALL_MODEL_INTERACTIONS = (
    os.environ.get("LOG_ALL_MODEL_INTERACTIONS", "").lower() == "true"
 )
 # Logs Onyx only model interactions like prompts, responses, messages etc.
-LOG_ONYX_MODEL_INTERACTIONS = (
-    os.environ.get("LOG_ONYX_MODEL_INTERACTIONS", "").lower() == "true"
+LOG_DANSWER_MODEL_INTERACTIONS = (
+    os.environ.get("LOG_DANSWER_MODEL_INTERACTIONS", "").lower() == "true"
 )
 LOG_INDIVIDUAL_MODEL_TOKENS = (
    os.environ.get("LOG_INDIVIDUAL_MODEL_TOKENS", "").lower() == "true"
@@ -770,6 +758,24 @@ AZURE_DALLE_DEPLOYMENT_NAME = os.environ.get("AZURE_DALLE_DEPLOYMENT_NAME")
 # configurable image model
 IMAGE_MODEL_NAME = os.environ.get("IMAGE_MODEL_NAME", "gpt-image-1")

+CODE_INTERPRETER_BASE_URL = os.environ.get("CODE_INTERPRETER_BASE_URL")
+_CODE_INTERPRETER_DEFAULT_TIMEOUT_MS_RAW = os.environ.get(
+    "CODE_INTERPRETER_DEFAULT_TIMEOUT_MS"
+)
+CODE_INTERPRETER_DEFAULT_TIMEOUT_MS = (
+    int(_CODE_INTERPRETER_DEFAULT_TIMEOUT_MS_RAW)
+    if _CODE_INTERPRETER_DEFAULT_TIMEOUT_MS_RAW
+    else 30_000
+)
+_CODE_INTERPRETER_REQUEST_TIMEOUT_SECONDS_RAW = os.environ.get(
+    "CODE_INTERPRETER_REQUEST_TIMEOUT_SECONDS"
+)
+CODE_INTERPRETER_REQUEST_TIMEOUT_SECONDS = (
+    int(_CODE_INTERPRETER_REQUEST_TIMEOUT_SECONDS_RAW)
+    if _CODE_INTERPRETER_REQUEST_TIMEOUT_SECONDS_RAW
+    else 30
+)
+
 # Use managed Vespa (Vespa Cloud). If set, must also set VESPA_CLOUD_URL, VESPA_CLOUD_CERT_PATH and VESPA_CLOUD_KEY_PATH
 MANAGED_VESPA = os.environ.get("MANAGED_VESPA", "").lower() == "true"

--- a/backend/onyx/configs/chat_configs.py
+++ b/backend/onyx/configs/chat_configs.py
@@ -3,6 +3,7 @@ import os
 INPUT_PROMPT_YAML = "./onyx/seeding/input_prompts.yaml"
 PROMPTS_YAML = "./onyx/seeding/prompts.yaml"
 PERSONAS_YAML = "./onyx/seeding/personas.yaml"
+USER_FOLDERS_YAML = "./onyx/seeding/user_folders.yaml"
 NUM_RETURNED_HITS = 50
 # Used for LLM filtering and reranking
 # We want this to be approximately the number of results we want to show on the first page
@@ -90,7 +91,6 @@ HARD_DELETE_CHATS = os.environ.get("HARD_DELETE_CHATS", "").lower() == "true"

 # Internet Search
 EXA_API_KEY = os.environ.get("EXA_API_KEY") or None
-SERPER_API_KEY = os.environ.get("SERPER_API_KEY") or None

 NUM_INTERNET_SEARCH_RESULTS = int(os.environ.get("NUM_INTERNET_SEARCH_RESULTS") or 10)
 NUM_INTERNET_SEARCH_CHUNKS = int(os.environ.get("NUM_INTERNET_SEARCH_CHUNKS") or 50)
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -7,8 +7,6 @@ from enum import Enum

 ONYX_DEFAULT_APPLICATION_NAME = "Onyx"
 ONYX_SLACK_URL = "https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA"
-SLACK_USER_TOKEN_PREFIX = "xoxp-"
-SLACK_BOT_TOKEN_PREFIX = "xoxb-"
 ONYX_EMAILABLE_LOGO_MAX_DIM = 512

 SOURCE_TYPE = "source_type"
@@ -78,9 +76,6 @@ POSTGRES_CELERY_WORKER_DOCFETCHING_APP_NAME = "celery_worker_docfetching"
 POSTGRES_CELERY_WORKER_MONITORING_APP_NAME = "celery_worker_monitoring"
 POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME = "celery_worker_indexing_child"
 POSTGRES_CELERY_WORKER_KG_PROCESSING_APP_NAME = "celery_worker_kg_processing"
-POSTGRES_CELERY_WORKER_USER_FILE_PROCESSING_APP_NAME = (
-    "celery_worker_user_file_processing"
-)
 POSTGRES_PERMISSIONS_APP_NAME = "permissions"
 POSTGRES_UNKNOWN_APP_NAME = "unknown"

@@ -117,6 +112,7 @@ CELERY_GENERIC_BEAT_LOCK_TIMEOUT = 120

 CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT = 120

+CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT = 120

 CELERY_PRIMARY_WORKER_LOCK_TIMEOUT = 120

@@ -207,8 +203,6 @@ class DocumentSource(str, Enum):

    # Special case just for integration tests
    MOCK_CONNECTOR = "mock_connector"
-    # Special case for user files
-    USER_FILE = "user_file"


 class FederatedConnectorSource(str, Enum):
@@ -304,7 +298,6 @@ class FileOrigin(str, Enum):
    PLAINTEXT_CACHE = "plaintext_cache"
    OTHER = "other"
    QUERY_HISTORY_CSV = "query_history_csv"
-    USER_FILE = "user_file"


 class FileType(str, Enum):
@@ -350,9 +343,6 @@ class OnyxCeleryQueues:
    # Indexing queue
    USER_FILES_INDEXING = "user_files_indexing"

-    # User file processing queue
-    USER_FILE_PROCESSING = "user_file_processing"
-    USER_FILE_PROJECT_SYNC = "user_file_project_sync"
    # Document processing pipeline queue
    DOCPROCESSING = "docprocessing"
    CONNECTOR_DOC_FETCHING = "connector_doc_fetching"
@@ -378,7 +368,7 @@ class OnyxRedisLocks:
    CHECK_CONNECTOR_EXTERNAL_GROUP_SYNC_BEAT_LOCK = (
        "da_lock:check_connector_external_group_sync_beat"
    )
-
+    CHECK_USER_FILE_FOLDER_SYNC_BEAT_LOCK = "da_lock:check_user_file_folder_sync_beat"
    MONITOR_BACKGROUND_PROCESSES_LOCK = "da_lock:monitor_background_processes"
    CHECK_AVAILABLE_TENANTS_LOCK = "da_lock:check_available_tenants"
    CLOUD_PRE_PROVISION_TENANT_LOCK = "da_lock:pre_provision_tenant"
@@ -400,12 +390,6 @@ class OnyxRedisLocks:
    # KG processing
    KG_PROCESSING_LOCK = "da_lock:kg_processing"

-    # User file processing
-    USER_FILE_PROCESSING_BEAT_LOCK = "da_lock:check_user_file_processing_beat"
-    USER_FILE_PROCESSING_LOCK_PREFIX = "da_lock:user_file_processing"
-    USER_FILE_PROJECT_SYNC_BEAT_LOCK = "da_lock:check_user_file_project_sync_beat"
-    USER_FILE_PROJECT_SYNC_LOCK_PREFIX = "da_lock:user_file_project_sync"
-

 class OnyxRedisSignals:
    BLOCK_VALIDATE_INDEXING_FENCES = "signal:block_validate_indexing_fences"
@@ -464,6 +448,8 @@ class OnyxCeleryTask:
        f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor_celery_pidbox"
    )

+    UPDATE_USER_FILE_FOLDER_METADATA = "update_user_file_folder_metadata"
+
    CHECK_FOR_CONNECTOR_DELETION = "check_for_connector_deletion_task"
    CHECK_FOR_VESPA_SYNC_TASK = "check_for_vespa_sync_task"
    CHECK_FOR_INDEXING = "check_for_indexing"
@@ -471,12 +457,7 @@ class OnyxCeleryTask:
    CHECK_FOR_DOC_PERMISSIONS_SYNC = "check_for_doc_permissions_sync"
    CHECK_FOR_EXTERNAL_GROUP_SYNC = "check_for_external_group_sync"
    CHECK_FOR_LLM_MODEL_UPDATE = "check_for_llm_model_update"
-
-    # User file processing
-    CHECK_FOR_USER_FILE_PROCESSING = "check_for_user_file_processing"
-    PROCESS_SINGLE_USER_FILE = "process_single_user_file"
-    CHECK_FOR_USER_FILE_PROJECT_SYNC = "check_for_user_file_project_sync"
-    PROCESS_SINGLE_USER_FILE_PROJECT_SYNC = "process_single_user_file_project_sync"
+    CHECK_FOR_USER_FILE_FOLDER_SYNC = "check_for_user_file_folder_sync"

    # Connector checkpoint cleanup
    CHECK_FOR_CHECKPOINT_CLEANUP = "check_for_checkpoint_cleanup"
@@ -509,7 +490,6 @@ class OnyxCeleryTask:
    CONNECTOR_PRUNING_GENERATOR_TASK = "connector_pruning_generator_task"
    DOCUMENT_BY_CC_PAIR_CLEANUP_TASK = "document_by_cc_pair_cleanup_task"
    VESPA_METADATA_SYNC_TASK = "vespa_metadata_sync_task"
-    USER_FILE_DOCID_MIGRATION = "user_file_docid_migration"

    # chat retention
    CHECK_TTL_MANAGEMENT_TASK = "check_ttl_management_task"
--- a/backend/onyx/configs/onyxbot_configs.py
+++ b/backend/onyx/configs/onyxbot_configs.py
@@ -3,26 +3,28 @@ import os
 #####
 # Onyx Slack Bot Configs
 #####
-ONYX_BOT_NUM_RETRIES = int(os.environ.get("ONYX_BOT_NUM_RETRIES", "5"))
+DANSWER_BOT_NUM_RETRIES = int(os.environ.get("DANSWER_BOT_NUM_RETRIES", "5"))
 # How much of the available input context can be used for thread context
 MAX_THREAD_CONTEXT_PERCENTAGE = 512 * 2 / 3072
 # Number of docs to display in "Reference Documents"
-ONYX_BOT_NUM_DOCS_TO_DISPLAY = int(os.environ.get("ONYX_BOT_NUM_DOCS_TO_DISPLAY", "5"))
+DANSWER_BOT_NUM_DOCS_TO_DISPLAY = int(
+    os.environ.get("DANSWER_BOT_NUM_DOCS_TO_DISPLAY", "5")
+)
 # If the LLM fails to answer, Onyx can still show the "Reference Documents"
-ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER = os.environ.get(
-    "ONYX_BOT_DISABLE_DOCS_ONLY_ANSWER", ""
+DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER = os.environ.get(
+    "DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER", ""
 ).lower() not in ["false", ""]
 # When Onyx is considering a message, what emoji does it react with
-ONYX_BOT_REACT_EMOJI = os.environ.get("ONYX_BOT_REACT_EMOJI") or "eyes"
+DANSWER_REACT_EMOJI = os.environ.get("DANSWER_REACT_EMOJI") or "eyes"
 # When User needs more help, what should the emoji be
-ONYX_BOT_FOLLOWUP_EMOJI = os.environ.get("ONYX_BOT_FOLLOWUP_EMOJI") or "sos"
+DANSWER_FOLLOWUP_EMOJI = os.environ.get("DANSWER_FOLLOWUP_EMOJI") or "sos"
 # What kind of message should be shown when someone gives an AI answer feedback to OnyxBot
 # Defaults to Private if not provided or invalid
 # Private: Only visible to user clicking the feedback
 # Anonymous: Public but anonymous
 # Public: Visible with the user name who submitted the feedback
-ONYX_BOT_FEEDBACK_VISIBILITY = (
-    os.environ.get("ONYX_BOT_FEEDBACK_VISIBILITY") or "private"
+DANSWER_BOT_FEEDBACK_VISIBILITY = (
+    os.environ.get("DANSWER_BOT_FEEDBACK_VISIBILITY") or "private"
 )
 # Should OnyxBot send an apology message if it's not able to find an answer
 # That way the user isn't confused as to why OnyxBot reacted but then said nothing
@@ -32,38 +34,40 @@ NOTIFY_SLACKBOT_NO_ANSWER = (
 )
 # Mostly for debugging purposes but it's for explaining what went wrong
 # if OnyxBot couldn't find an answer
-ONYX_BOT_DISPLAY_ERROR_MSGS = os.environ.get(
-    "ONYX_BOT_DISPLAY_ERROR_MSGS", ""
+DANSWER_BOT_DISPLAY_ERROR_MSGS = os.environ.get(
+    "DANSWER_BOT_DISPLAY_ERROR_MSGS", ""
 ).lower() not in [
    "false",
    "",
 ]
 # Default is only respond in channels that are included by a slack config set in the UI
-ONYX_BOT_RESPOND_EVERY_CHANNEL = (
-    os.environ.get("ONYX_BOT_RESPOND_EVERY_CHANNEL", "").lower() == "true"
+DANSWER_BOT_RESPOND_EVERY_CHANNEL = (
+    os.environ.get("DANSWER_BOT_RESPOND_EVERY_CHANNEL", "").lower() == "true"
 )

 # Maximum Questions Per Minute, Default Uncapped
-ONYX_BOT_MAX_QPM = int(os.environ.get("ONYX_BOT_MAX_QPM") or 0) or None
+DANSWER_BOT_MAX_QPM = int(os.environ.get("DANSWER_BOT_MAX_QPM") or 0) or None
 # Maximum time to wait when a question is queued
-ONYX_BOT_MAX_WAIT_TIME = int(os.environ.get("ONYX_BOT_MAX_WAIT_TIME") or 180)
+DANSWER_BOT_MAX_WAIT_TIME = int(os.environ.get("DANSWER_BOT_MAX_WAIT_TIME") or 180)

 # Time (in minutes) after which a Slack message is sent to the user to remind him to give feedback.
 # Set to 0 to disable it (default)
-ONYX_BOT_FEEDBACK_REMINDER = int(os.environ.get("ONYX_BOT_FEEDBACK_REMINDER") or 0)
+DANSWER_BOT_FEEDBACK_REMINDER = int(
+    os.environ.get("DANSWER_BOT_FEEDBACK_REMINDER") or 0
+)
 # Set to True to rephrase the Slack users messages
-ONYX_BOT_REPHRASE_MESSAGE = (
-    os.environ.get("ONYX_BOT_REPHRASE_MESSAGE", "").lower() == "true"
+DANSWER_BOT_REPHRASE_MESSAGE = (
+    os.environ.get("DANSWER_BOT_REPHRASE_MESSAGE", "").lower() == "true"
 )

-# ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD is the number of
+# DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD is the number of
 # responses OnyxBot can send in a given time period.
 # Set to 0 to disable the limit.
-ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD = int(
-    os.environ.get("ONYX_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD", "5000")
+DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD = int(
+    os.environ.get("DANSWER_BOT_RESPONSE_LIMIT_PER_TIME_PERIOD", "5000")
 )
-# ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS is the number
+# DANSWER_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS is the number
 # of seconds until the response limit is reset.
-ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS = int(
-    os.environ.get("ONYX_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS", "86400")
+DANSWER_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS = int(
+    os.environ.get("DANSWER_BOT_RESPONSE_LIMIT_TIME_PERIOD_SECONDS", "86400")
 )
--- a/backend/onyx/connectors/README.md
+++ b/backend/onyx/connectors/README.md
@@ -41,7 +41,7 @@ All new connectors should have tests added to the `backend/tests/daily/connector

 #### Implementing the new Connector

-The connector must subclass one or more of LoadConnector, PollConnector, CheckpointedConnector, or CheckpointedConnectorWithPermSync
+The connector must subclass one or more of LoadConnector, PollConnector, SlimConnector, or EventConnector.

 The `__init__` should take arguments for configuring what documents the connector will and where it finds those
 documents. For example, if you have a wiki site, it may include the configuration for the team, topic, folder, etc. of
--- a/backend/onyx/connectors/bitbucket/connector.py
+++ b/backend/onyx/connectors/bitbucket/connector.py
@@ -25,7 +25,7 @@ from onyx.connectors.exceptions import UnexpectedValidationError
 from onyx.connectors.interfaces import CheckpointedConnector
 from onyx.connectors.interfaces import CheckpointOutput
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
+from onyx.connectors.interfaces import SlimConnector
 from onyx.connectors.models import ConnectorCheckpoint
 from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import ConnectorMissingCredentialError
@@ -56,7 +56,7 @@ class BitbucketConnectorCheckpoint(ConnectorCheckpoint):

 class BitbucketConnector(
    CheckpointedConnector[BitbucketConnectorCheckpoint],
-    SlimConnectorWithPermSync,
+    SlimConnector,
 ):
    """Connector for indexing Bitbucket Cloud pull requests.

@@ -266,7 +266,7 @@ class BitbucketConnector(
        """Validate and deserialize a checkpoint instance from JSON."""
        return BitbucketConnectorCheckpoint.model_validate_json(checkpoint_json)

-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
--- a/backend/onyx/connectors/confluence/connector.py
+++ b/backend/onyx/connectors/confluence/connector.py
@@ -5,7 +5,6 @@ from datetime import timezone
 from typing import Any
 from urllib.parse import quote

-from atlassian.errors import ApiError  # type: ignore
 from requests.exceptions import HTTPError
 from typing_extensions import override

@@ -42,7 +41,6 @@ from onyx.connectors.interfaces import CredentialsProviderInterface
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
 from onyx.connectors.interfaces import SlimConnector
-from onyx.connectors.interfaces import SlimConnectorWithPermSync
 from onyx.connectors.models import BasicExpertInfo
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
@@ -93,7 +91,6 @@ class ConfluenceCheckpoint(ConnectorCheckpoint):
 class ConfluenceConnector(
    CheckpointedConnector[ConfluenceCheckpoint],
    SlimConnector,
-    SlimConnectorWithPermSync,
    CredentialsConnector,
 ):
    def __init__(
@@ -111,7 +108,6 @@ class ConfluenceConnector(
        # pages.
        labels_to_skip: list[str] = CONFLUENCE_CONNECTOR_LABELS_TO_SKIP,
        timezone_offset: float = CONFLUENCE_TIMEZONE_OFFSET,
-        scoped_token: bool = False,
    ) -> None:
        self.wiki_base = wiki_base
        self.is_cloud = is_cloud
@@ -122,7 +118,6 @@ class ConfluenceConnector(
        self.batch_size = batch_size
        self.labels_to_skip = labels_to_skip
        self.timezone_offset = timezone_offset
-        self.scoped_token = scoped_token
        self._confluence_client: OnyxConfluence | None = None
        self._low_timeout_confluence_client: OnyxConfluence | None = None
        self._fetched_titles: set[str] = set()
@@ -200,7 +195,6 @@ class ConfluenceConnector(
            is_cloud=self.is_cloud,
            url=self.wiki_base,
            credentials_provider=credentials_provider,
-            scoped_token=self.scoped_token,
        )
        confluence_client._probe_connection(**self.probe_kwargs)
        confluence_client._initialize_connection(**self.final_kwargs)
@@ -213,7 +207,6 @@ class ConfluenceConnector(
            url=self.wiki_base,
            credentials_provider=credentials_provider,
            timeout=3,
-            scoped_token=self.scoped_token,
        )
        low_timeout_confluence_client._probe_connection(**self.probe_kwargs)
        low_timeout_confluence_client._initialize_connection(**self.final_kwargs)
@@ -565,21 +558,7 @@ class ConfluenceConnector(
    def validate_checkpoint_json(self, checkpoint_json: str) -> ConfluenceCheckpoint:
        return ConfluenceCheckpoint.model_validate_json(checkpoint_json)

-    @override
-    def retrieve_all_slim_docs(
-        self,
-        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
-        callback: IndexingHeartbeatInterface | None = None,
-    ) -> GenerateSlimDocumentOutput:
-        return self._retrieve_all_slim_docs(
-            start=start,
-            end=end,
-            callback=callback,
-            include_permissions=False,
-        )
-
-    def retrieve_all_slim_docs_perm_sync(
+    def retrieve_all_slim_documents(
        self,
        start: SecondsSinceUnixEpoch | None = None,
        end: SecondsSinceUnixEpoch | None = None,
@@ -589,28 +568,12 @@ class ConfluenceConnector(
        Return 'slim' docs (IDs + minimal permission data).
        Does not fetch actual text. Used primarily for incremental permission sync.
        """
-        return self._retrieve_all_slim_docs(
-            start=start,
-            end=end,
-            callback=callback,
-            include_permissions=True,
-        )
-
-    def _retrieve_all_slim_docs(
-        self,
-        start: SecondsSinceUnixEpoch | None = None,
-        end: SecondsSinceUnixEpoch | None = None,
-        callback: IndexingHeartbeatInterface | None = None,
-        include_permissions: bool = True,
-    ) -> GenerateSlimDocumentOutput:
        doc_metadata_list: list[SlimDocument] = []
        restrictions_expand = ",".join(_RESTRICTIONS_EXPANSION_FIELDS)

-        space_level_access_info: dict[str, ExternalAccess] = {}
-        if include_permissions:
-            space_level_access_info = get_all_space_permissions(
-                self.confluence_client, self.is_cloud
-            )
+        space_level_access_info = get_all_space_permissions(
+            self.confluence_client, self.is_cloud
+        )

        def get_external_access(
            doc_id: str, restrictions: dict[str, Any], ancestors: list[dict[str, Any]]
@@ -637,10 +600,8 @@ class ConfluenceConnector(
            doc_metadata_list.append(
                SlimDocument(
                    id=page_id,
-                    external_access=(
-                        get_external_access(page_id, page_restrictions, page_ancestors)
-                        if include_permissions
-                        else None
+                    external_access=get_external_access(
+                        page_id, page_restrictions, page_ancestors
                    ),
                )
            )
@@ -675,12 +636,8 @@ class ConfluenceConnector(
                doc_metadata_list.append(
                    SlimDocument(
                        id=attachment_id,
-                        external_access=(
-                            get_external_access(
-                                attachment_id, attachment_restrictions, []
-                            )
-                            if include_permissions
-                            else None
+                        external_access=get_external_access(
+                            attachment_id, attachment_restrictions, []
                        ),
                    )
                )
@@ -691,10 +648,10 @@ class ConfluenceConnector(

                if callback and callback.should_stop():
                    raise RuntimeError(
-                        "retrieve_all_slim_docs_perm_sync: Stop signal detected"
+                        "retrieve_all_slim_documents: Stop signal detected"
                    )
                if callback:
-                    callback.progress("retrieve_all_slim_docs_perm_sync", 1)
+                    callback.progress("retrieve_all_slim_documents", 1)

        yield doc_metadata_list

@@ -719,14 +676,6 @@ class ConfluenceConnector(
                f"Unexpected error while validating Confluence settings: {e}"
            )

-        if self.space:
-            try:
-                self.low_timeout_confluence_client.get_space(self.space)
-            except ApiError as e:
-                raise ConnectorValidationError(
-                    "Invalid Confluence space key provided"
-                ) from e
-
        if not spaces or not spaces.get("results"):
            raise ConnectorValidationError(
                "No Confluence spaces found. Either your credentials lack permissions, or "
@@ -775,7 +724,7 @@ if __name__ == "__main__":
    end = datetime.now().timestamp()

    # Fetch all `SlimDocuments`.
-    for slim_doc in confluence_connector.retrieve_all_slim_docs_perm_sync():
+    for slim_doc in confluence_connector.retrieve_all_slim_documents():
        print(slim_doc)

    # Fetch all `Documents`.
--- a/backend/onyx/connectors/confluence/onyx_confluence.py
+++ b/backend/onyx/connectors/confluence/onyx_confluence.py
@@ -41,7 +41,6 @@ from onyx.connectors.confluence.utils import _handle_http_error
 from onyx.connectors.confluence.utils import confluence_refresh_tokens
 from onyx.connectors.confluence.utils import get_start_param_from_url
 from onyx.connectors.confluence.utils import update_param_in_path
-from onyx.connectors.cross_connector_utils.miscellaneous_utils import scoped_url
 from onyx.connectors.interfaces import CredentialsProviderInterface
 from onyx.file_processing.html_utils import format_document_soup
 from onyx.redis.redis_pool import get_redis_client
@@ -88,20 +87,16 @@ class OnyxConfluence:
        url: str,
        credentials_provider: CredentialsProviderInterface,
        timeout: int | None = None,
-        scoped_token: bool = False,
        # should generally not be passed in, but making it overridable for
        # easier testing
        confluence_user_profiles_override: list[dict[str, str]] | None = (
            CONFLUENCE_CONNECTOR_USER_PROFILES_OVERRIDE
        ),
    ) -> None:
-        self.base_url = url  #'/'.join(url.rstrip("/").split("/")[:-1])
-        url = scoped_url(url, "confluence") if scoped_token else url
-
        self._is_cloud = is_cloud
        self._url = url.rstrip("/")
        self._credentials_provider = credentials_provider
-        self.scoped_token = scoped_token
+
        self.redis_client: Redis | None = None
        self.static_credentials: dict[str, Any] | None = None
        if self._credentials_provider.is_dynamic():
@@ -223,34 +218,6 @@ class OnyxConfluence:

        with self._credentials_provider:
            credentials, _ = self._renew_credentials()
-            if self.scoped_token:
-                # v2 endpoint doesn't always work with scoped tokens, use v1
-                token = credentials["confluence_access_token"]
-                probe_url = f"{self.base_url}/rest/api/space?limit=1"
-                import requests
-
-                logger.info(f"First and Last 5 of token: {token[:5]}...{token[-5:]}")
-
-                try:
-                    r = requests.get(
-                        probe_url,
-                        headers={"Authorization": f"Bearer {token}"},
-                        timeout=10,
-                    )
-                    r.raise_for_status()
-                except HTTPError as e:
-                    if e.response.status_code == 403:
-                        logger.warning(
-                            "scoped token authenticated but not valid for probe endpoint (spaces)"
-                        )
-                    else:
-                        if "WWW-Authenticate" in e.response.headers:
-                            logger.warning(
-                                f"WWW-Authenticate: {e.response.headers['WWW-Authenticate']}"
-                            )
-                            logger.warning(f"Full error: {e.response.text}")
-                        raise e
-                return

            # probe connection with direct client, no retries
            if "confluence_refresh_token" in credentials:
@@ -269,7 +236,6 @@ class OnyxConfluence:
                logger.info("Probing Confluence with Personal Access Token.")
                url = self._url
                if self._is_cloud:
-                    logger.info("running with cloud client")
                    confluence_client_with_minimal_retries = Confluence(
                        url=url,
                        username=credentials["confluence_username"],
@@ -338,9 +304,7 @@ class OnyxConfluence:
            url = f"https://api.atlassian.com/ex/confluence/{credentials['cloud_id']}"
            confluence = Confluence(url=url, oauth2=oauth2_dict, **kwargs)
        else:
-            logger.info(
-                f"Connecting to Confluence with Personal Access Token as user: {credentials['confluence_username']}"
-            )
+            logger.info("Connecting to Confluence with Personal Access Token.")
            if self._is_cloud:
                confluence = Confluence(
                    url=self._url,
--- a/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py
+++ b/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py
@@ -5,10 +5,7 @@ from datetime import datetime
 from datetime import timezone
 from typing import Any
 from typing import TypeVar
-from urllib.parse import urljoin
-from urllib.parse import urlparse

-import requests
 from dateutil.parser import parse

 from onyx.configs.app_configs import CONNECTOR_LOCALHOST_OVERRIDE
@@ -151,17 +148,3 @@ def get_oauth_callback_uri(base_domain: str, connector_id: str) -> str:

 def is_atlassian_date_error(e: Exception) -> bool:
    return "field 'updated' is invalid" in str(e)
-
-
-def get_cloudId(base_url: str) -> str:
-    tenant_info_url = urljoin(base_url, "/_edge/tenant_info")
-    response = requests.get(tenant_info_url, timeout=10)
-    response.raise_for_status()
-    return response.json()["cloudId"]
-
-
-def scoped_url(url: str, product: str) -> str:
-    parsed = urlparse(url)
-    base_url = parsed.scheme + "://" + parsed.netloc
-    cloud_id = get_cloudId(base_url)
-    return f"https://api.atlassian.com/ex/{product}/{cloud_id}{parsed.path}"
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`"""Python Tool sub-agent for deep research."""`