k

Pause cloud tasks for gated tenants (#3990 )
k
2026-02-17 07:45:47 +00:00 · 2025-02-13 14:55:24 -08:00 · 2025-02-13 14:43:13 -08:00 · 2025-02-13 12:53:58 -08:00 · 2025-02-13 12:53:44 -08:00 · 2025-02-13 12:52:16 -08:00
432 changed files with 5703 additions and 17366 deletions
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -65,7 +65,6 @@ jobs:
            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
-            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
            NEXT_PUBLIC_GTM_ENABLED=true
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -4,6 +4,9 @@ on:
  push:
    tags:
      - "*"
+    paths:
+      - 'backend/model_server/**'
+      - 'backend/Dockerfile.model_server'

 env:
  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
@@ -12,32 +15,7 @@ env:
  BUILDKIT_PROGRESS: plain

 jobs:
-  # 1) Preliminary job to check if the changed files are relevant
-  check_model_server_changes:
-    runs-on: ubuntu-latest
-    outputs:
-      changed: ${{ steps.check.outputs.changed }}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Check if relevant files changed
-        id: check
-        run: |
-          # Default to "false"
-          echo "changed=false" >> $GITHUB_OUTPUT
-
-          # Compare the previous commit (github.event.before) to the current one (github.sha)
-          # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
-          # set changed=true
-          if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
-             | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
-            echo "changed=true" >> $GITHUB_OUTPUT
-          fi
-
  build-amd64:
-    needs: [check_model_server_changes]
-    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on:
      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
    steps:
@@ -77,8 +55,6 @@ jobs:
          provenance: false

  build-arm64:
-    needs: [check_model_server_changes]
-    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on:
      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"]
    steps:
@@ -118,8 +94,7 @@ jobs:
          provenance: false

  merge-and-scan:
-    needs: [build-amd64, build-arm64, check_model_server_changes]
-    if: needs.check_model_server_changes.outputs.changed == 'true'
+    needs: [build-amd64, build-arm64]
    runs-on: ubuntu-latest
    steps:
      - name: Login to Docker Hub
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -53,90 +53,24 @@ jobs:
          exclude: '(?i)^(pylint|aio[-_]*).*'
          
      - name: Print report
-        if: always()
+        if: ${{ always() }}
        run: echo "${{ steps.license_check_report.outputs.report }}"
      
      - name: Install npm dependencies
        working-directory: ./web
        run: npm ci
+        
+      - name: Run Trivy vulnerability scanner in repo mode
+        uses: aquasecurity/trivy-action@0.28.0
+        with:
+          scan-type: fs
+          scanners: license
+          format: table
+#           format: sarif
+#           output: trivy-results.sarif
+          severity: HIGH,CRITICAL

-        # be careful enabling the sarif and upload as it may spam the security tab
-        # with a huge amount of items. Work out the issues before enabling upload.       
-#       - name: Run Trivy vulnerability scanner in repo mode
-#         if: always()
-#         uses: aquasecurity/trivy-action@0.29.0
+#       - name: Upload Trivy scan results to GitHub Security tab
+#         uses: github/codeql-action/upload-sarif@v3
 #         with:
-#           scan-type: fs
-#           scan-ref: .
-#           scanners: license
-#           format: table
-#           severity: HIGH,CRITICAL
-# #           format: sarif
-# #           output: trivy-results.sarif
-# 
-# #       - name: Upload Trivy scan results to GitHub Security tab
-# #         uses: github/codeql-action/upload-sarif@v3
-# #         with:
-# #           sarif_file: trivy-results.sarif
-
-  scan-trivy:
-    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
-      
-    steps:
-    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v3
-
-    - name: Login to Docker Hub
-      uses: docker/login-action@v3
-      with:
-        username: ${{ secrets.DOCKER_USERNAME }}
-        password: ${{ secrets.DOCKER_TOKEN }}
-
-    # Backend
-    - name: Pull backend docker image
-      run: docker pull onyxdotapp/onyx-backend:latest
-
-    - name: Run Trivy vulnerability scanner on backend
-      uses: aquasecurity/trivy-action@0.29.0
-      env:
-        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
-      with:
-        image-ref: onyxdotapp/onyx-backend:latest
-        scanners: license
-        severity: HIGH,CRITICAL
-        vuln-type: library
-        exit-code: 0  # Set to 1 if we want a failed scan to fail the workflow
-
-    # Web server
-    - name: Pull web server docker image
-      run: docker pull onyxdotapp/onyx-web-server:latest
-          
-    - name: Run Trivy vulnerability scanner on web server
-      uses: aquasecurity/trivy-action@0.29.0
-      env:
-        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
-      with:
-        image-ref: onyxdotapp/onyx-web-server:latest
-        scanners: license
-        severity: HIGH,CRITICAL
-        vuln-type: library
-        exit-code: 0
-
-    # Model server
-    - name: Pull model server docker image
-      run: docker pull onyxdotapp/onyx-model-server:latest
-
-    - name: Run Trivy vulnerability scanner
-      uses: aquasecurity/trivy-action@0.29.0
-      env:
-        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
-        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
-      with:
-        image-ref: onyxdotapp/onyx-model-server:latest
-        scanners: license
-        severity: HIGH,CRITICAL
-        vuln-type: library
-        exit-code: 0
+#           sarif_file: trivy-results.sarif
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -1,6 +1,6 @@
-name: Run Playwright Tests
+name: Run Chromatic Tests
 concurrency:
-  group: Run-Playwright-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  group: Run-Chromatic-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

 on: push
@@ -198,47 +198,43 @@ jobs:
          cd deployment/docker_compose
          docker compose -f docker-compose.dev.yml -p danswer-stack down -v

-# NOTE: Chromatic UI diff testing is currently disabled.
-# We are using Playwright for local and CI testing without visual regression checks.
-# Chromatic may be reintroduced in the future for UI diff testing if needed.
+  chromatic-tests:
+    name: Chromatic Tests

-# chromatic-tests:
-#   name: Chromatic Tests
+    needs: playwright-tests
+    runs-on:
+      [
+        runs-on,
+        runner=32cpu-linux-x64,
+        disk=large,
+        "run-id=${{ github.run_id }}",
+      ]
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0

-#   needs: playwright-tests
-#   runs-on:
-#     [
-#       runs-on,
-#       runner=32cpu-linux-x64,
-#       disk=large,
-#       "run-id=${{ github.run_id }}",
-#     ]
-#   steps:
-#     - name: Checkout code
-#       uses: actions/checkout@v4
-#       with:
-#         fetch-depth: 0
+      - name: Setup node
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22

-#     - name: Setup node
-#       uses: actions/setup-node@v4
-#       with:
-#         node-version: 22
+      - name: Install node dependencies
+        working-directory: ./web
+        run: npm ci

-#     - name: Install node dependencies
-#       working-directory: ./web
-#       run: npm ci
+      - name: Download Playwright test results
+        uses: actions/download-artifact@v4
+        with:
+          name: test-results
+          path: ./web/test-results

-#     - name: Download Playwright test results
-#       uses: actions/download-artifact@v4
-#       with:
-#         name: test-results
-#         path: ./web/test-results
-
-#     - name: Run Chromatic
-#       uses: chromaui/action@latest
-#       with:
-#         playwright: true
-#         projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
-#         workingDir: ./web
-#       env:
-#         CHROMATIC_ARCHIVE_LOCATION: ./test-results
+      - name: Run Chromatic
+        uses: chromaui/action@latest
+        with:
+          playwright: true
+          projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
+          workingDir: ./web
+        env:
+          CHROMATIC_ARCHIVE_LOCATION: ./test-results
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -99,7 +99,7 @@ jobs:
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
          DEV_MODE=true \
-          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack up -d
+          docker compose -f docker-compose.multitenant-dev.yml -p danswer-stack up -d
        id: start_docker_multi_tenant

      # In practice, `cloud` Auth type would require OAUTH credentials to be set.
@@ -108,13 +108,12 @@ jobs:
          echo "Waiting for 3 minutes to ensure API server is ready..."
          sleep 180
          echo "Running integration tests..."
-          docker run --rm --network onyx-stack_default \
+          docker run --rm --network danswer-stack_default \
            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
            -e POSTGRES_DB=postgres \
-            -e POSTGRES_USE_NULL_POOL=true \
            -e VESPA_HOST=index \
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
@@ -144,28 +143,24 @@ jobs:
      - name: Stop multi-tenant Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack down -v
+          docker compose -f docker-compose.multitenant-dev.yml -p danswer-stack down -v

-      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
          AUTH_TYPE=basic \
-          POSTGRES_POOL_PRE_PING=true \
-          POSTGRES_USE_NULL_POOL=true \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
-          INTEGRATION_TESTS_MODE=true \
-          docker compose -f docker-compose.dev.yml -p onyx-stack up -d
+          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
        id: start_docker

      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."

-          docker logs -f onyx-stack-api_server-1 &
+          docker logs -f danswer-stack-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
@@ -195,24 +190,15 @@ jobs:
          done
          echo "Finished waiting for service."

-      - name: Start Mock Services
-        run: |
-          cd backend/tests/integration/mock_services
-          docker compose -f docker-compose.mock-it-services.yml \
-            -p mock-it-services-stack up -d
-
-      # NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
      - name: Run Standard Integration Tests
        run: |
          echo "Running integration tests..."
-          docker run --rm --network onyx-stack_default \
+          docker run --rm --network danswer-stack_default \
            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
            -e POSTGRES_DB=postgres \
-            -e POSTGRES_POOL_PRE_PING=true \
-            -e POSTGRES_USE_NULL_POOL=true \
            -e VESPA_HOST=index \
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
@@ -222,8 +208,6 @@ jobs:
            -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
            -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
            -e TEST_WEB_HOSTNAME=test-runner \
-            -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
-            -e MOCK_CONNECTOR_SERVER_PORT=8001 \
            onyxdotapp/onyx-integration:test \
            /app/tests/integration/tests \
            /app/tests/integration/connector_job_tests
@@ -245,13 +229,13 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
+          docker compose -f docker-compose.dev.yml -p danswer-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true

      - name: Dump all-container logs (optional)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+          docker compose -f docker-compose.dev.yml -p danswer-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
@@ -265,4 +249,4 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack down -v
+          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -44,9 +44,6 @@ env:
  SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
  SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
  SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
-  # Gitbook
-  GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
-  GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}

 jobs:
  connectors-check:
@@ -74,9 +71,7 @@ jobs:
          python -m pip install --upgrade pip
          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-          playwright install chromium
-          playwright install-deps chromium
-          
+
      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -1,29 +1,18 @@
-name: Model Server Tests
+name: Connector Tests

 on:
  schedule:
    # This cron expression runs the job daily at 16:00 UTC (9am PT)
    - cron: "0 16 * * *"
-  workflow_dispatch:
-    inputs:
-      branch:
-        description: 'Branch to run the workflow on'
-        required: false
-        default: 'main'
-        
+
 env:
  # Bedrock
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
  AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}

-  # API keys for testing
-  COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
-  LITELLM_API_KEY: ${{ secrets.LITELLM_API_KEY }}
-  LITELLM_API_URL: ${{ secrets.LITELLM_API_URL }}
+  # OpenAI
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-  AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
-  AZURE_API_URL: ${{ secrets.AZURE_API_URL }}

 jobs:
  model-check:
@@ -37,23 +26,6 @@ jobs:
      - name: Checkout code
        uses: actions/checkout@v4

-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      # tag every docker image with "test" so that we can spin up the correct set
-      # of images during testing
-
-      # We don't need to build the Web Docker image since it's not yet used
-      # in the integration tests. We have a separate action to verify that it builds
-      # successfully.
-      - name: Pull Model Server Docker image
-        run: |
-          docker pull onyxdotapp/onyx-model-server:latest
-          docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
-          
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
@@ -69,49 +41,6 @@ jobs:
          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt

-      - name: Start Docker containers
-        run: |
-          cd deployment/docker_compose
-          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
-          AUTH_TYPE=basic \
-          REQUIRE_EMAIL_VERIFICATION=false \
-          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
-          docker compose -f docker-compose.model-server-test.yml -p onyx-stack up -d indexing_model_server
-        id: start_docker
-
-      - name: Wait for service to be ready
-        run: |
-          echo "Starting wait-for-service script..."
-
-          start_time=$(date +%s)
-          timeout=300  # 5 minutes in seconds
-
-          while true; do
-            current_time=$(date +%s)
-            elapsed_time=$((current_time - start_time))
-            
-            if [ $elapsed_time -ge $timeout ]; then
-              echo "Timeout reached. Service did not become ready in 5 minutes."
-              exit 1
-            fi
-            
-            # Use curl with error handling to ignore specific exit code 56
-            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
-            
-            if [ "$response" = "200" ]; then
-              echo "Service is ready!"
-              break
-            elif [ "$response" = "curl_error" ]; then
-              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
-            else
-              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
-            fi
-            
-            sleep 5
-          done
-          echo "Finished waiting for service."
-          
      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
@@ -127,23 +56,3 @@ jobs:
            -H 'Content-type: application/json' \
            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
            $SLACK_WEBHOOK
-            
-      - name: Dump all-container logs (optional)
-        if: always()
-        run: |
-          cd deployment/docker_compose
-          docker compose -f docker-compose.model-server-test.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
-
-      - name: Upload logs
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: docker-all-logs
-          path: ${{ github.workspace }}/docker-compose.log
-          
-      - name: Stop Docker containers
-        if: always()
-        run: |
-          cd deployment/docker_compose
-          docker compose -f docker-compose.model-server-test.yml -p onyx-stack down -v
-          
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -205,7 +205,7 @@
                "--loglevel=INFO",
                "--hostname=light@%n",
                "-Q",
-                "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup",
+                "vespa_metadata_sync,connector_deletion,doc_permissions_upsert",
            ],
            "presentation": {
 				 "group": "2",
--- a/README.md
+++ b/README.md
@@ -24,93 +24,113 @@
 </a>
 </p>

-<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI platform connected to your company's docs, apps, and people.
-Onyx provides a feature rich Chat interface and plugs into any LLM of your choice.
-Keep knowledge and access controls sync-ed across over 40 connectors like Google Drive, Slack, Confluence, Salesforce, etc.
-Create custom AI agents with unique prompts, knowledge, and actions that the agents can take.
-Onyx can be deployed securely anywhere and for any scale - on a laptop, on-premise, or to cloud.
+<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI Assistant connected to your company's docs, apps, and people.
+Onyx provides a Chat interface and plugs into any LLM of your choice. Onyx can be deployed anywhere and for any
+scale - on a laptop, on-premise, or to cloud. Since you own the deployment, your user data and chats are fully in your
+own control. Onyx is dual Licensed with most of it under MIT license and designed to be modular and easily extensible. The system also comes fully ready
+for production usage with user authentication, role management (admin/basic users), chat persistence, and a UI for
+configuring AI Assistants.

+Onyx also serves as a Enterprise Search across all common workplace tools such as Slack, Google Drive, Confluence, etc.
+By combining LLMs and team specific knowledge, Onyx becomes a subject matter expert for the team. Imagine ChatGPT if
+it had access to your team's unique knowledge! It enables questions such as "A customer wants feature X, is this already
+supported?" or "Where's the pull request for feature Y?"

-<h3>Feature Highlights</h3>
+<h3>Usage</h3>

-**Deep research over your team's knowledge:**
+Onyx Web App:

-https://private-user-images.githubusercontent.com/32520769/414509312-48392e83-95d0-4fb5-8650-a396e05e0a32.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3Mzk5Mjg2MzYsIm5iZiI6MTczOTkyODMzNiwicGF0aCI6Ii8zMjUyMDc2OS80MTQ1MDkzMTItNDgzOTJlODMtOTVkMC00ZmI1LTg2NTAtYTM5NmUwNWUwYTMyLm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAyMTklMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMjE5VDAxMjUzNlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPWFhMzk5Njg2Y2Y5YjFmNDNiYTQ2YzM5ZTg5YWJiYTU2NWMyY2YwNmUyODE2NWUxMDRiMWQxZWJmODI4YTA0MTUmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.a9D8A0sgKE9AoaoE-mfFbJ6_OKYeqaf7TZ4Han2JfW8
+https://github.com/onyx-dot-app/onyx/assets/32520769/563be14c-9304-47b5-bf0a-9049c2b6f410

+Or, plug Onyx into your existing Slack workflows (more integrations to come 😁):

-**Use Onyx as a secure AI Chat with any LLM:**
-
-![Onyx Chat Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxChatSilentDemo.gif)
-
-
-**Easily set up connectors to your apps:**
-
-![Onyx Connector Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxConnectorSilentDemo.gif)
-
-
-**Access Onyx where your team already works:**
-
-![Onyx Bot Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxBot.png)
+https://github.com/onyx-dot-app/onyx/assets/25087905/3e19739b-d178-4371-9a38-011430bdec1b

+For more details on the Admin UI to manage connectors and users, check out our
+<strong><a href="https://www.youtube.com/watch?v=geNzY1nbCnU">Full Video Demo</a></strong>!

 ## Deployment
-**To try it out for free and get started in seconds, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.

-Onyx can also be run locally (even on a laptop) or deployed on a virtual machine with a single
+Onyx can easily be run locally (even on a laptop) or deployed on a virtual machine with a single
 `docker compose` command. Checkout our [docs](https://docs.onyx.app/quickstart) to learn more.

-We also have built-in support for high-availability/scalable deployment on Kubernetes.
-References [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment).
+We also have built-in support for deployment on Kubernetes. Files for that can be found [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment/kubernetes).

+## 💃 Main Features

-## 🔍 Other Notable Benefits of Onyx
- Custom deep learning models for indexing and inference time, only through Onyx + learning from user feedback.
- Flexible security features like SSO (OIDC/SAML/OAuth2), RBAC, encryption of credentials, etc.
- Knowledge curation features like document-sets, query history, usage analytics, etc.
- Scalable deployment options tested up to many tens of thousands users and hundreds of millions of documents.
-
+- Chat UI with the ability to select documents to chat with.
+- Create custom AI Assistants with different prompts and backing knowledge sets.
+- Connect Onyx with LLM of your choice (self-host for a fully airgapped solution).
+- Document Search + AI Answers for natural language queries.
+- Connectors to all common workplace tools like Google Drive, Confluence, Slack, etc.
+- Slack integration to get answers and search results directly in Slack.

 ## 🚧 Roadmap
- New methods in information retrieval (StructRAG, LightGraphRAG, etc.)
- Personalized Search
- Organizational understanding and ability to locate and suggest experts from your team.
- Code Search
- SQL and Structured Query Language

+- Chat/Prompt sharing with specific teammates and user groups.
+- Multimodal model support, chat with images, video etc.
+- Choosing between LLMs and parameters during chat session.
+- Tool calling and agent configurations options.
+- Organizational understanding and ability to locate and suggest experts from your team.
+
+## Other Notable Benefits of Onyx
+
+- User Authentication with document level access management.
+- Best in class Hybrid Search across all sources (BM-25 + prefix aware embedding models).
+- Admin Dashboard to configure connectors, document-sets, access, etc.
+- Custom deep learning models + learn from user feedback.
+- Easy deployment and ability to host Onyx anywhere of your choosing.

 ## 🔌 Connectors
-Keep knowledge and access up to sync across 40+ connectors:

+Efficiently pulls the latest changes from:
+
+- Slack
+- GitHub
 - Google Drive
 - Confluence
- Slack
- Gmail
- Salesforce
- Microsoft Sharepoint
- Github
 - Jira
 - Zendesk
+- Gmail
+- Notion
 - Gong
- Microsoft Teams
- Dropbox
+- Slab
+- Linear
+- Productboard
+- Guru
+- Bookstack
+- Document360
+- Sharepoint
+- Hubspot
 - Local Files
 - Websites
 - And more ...

-See the full list [here](https://docs.onyx.app/connectors).
+## 📚 Editions

-
-## 📚 Licensing
 There are two editions of Onyx:

- Onyx Community Edition (CE) is available freely under the MIT Expat license. Simply follow the Deployment guide above.
- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
-For feature details, check out [our website](https://www.onyx.app/pricing).
+- Onyx Community Edition (CE) is available freely under the MIT Expat license. This version has ALL the core features discussed above. This is the version of Onyx you will get if you follow the Deployment guide above.
+- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations. Specifically, this includes:
+  - Single Sign-On (SSO), with support for both SAML and OIDC
+  - Role-based access control
+  - Document permission inheritance from connected sources
+  - Usage analytics and query history accessible to admins
+  - Whitelabeling
+  - API key authentication
+  - Encryption of secrets
+  - And many more! Checkout [our website](https://www.onyx.app/) for the latest.

 To try the Onyx Enterprise Edition:
-1. Checkout [Onyx Cloud](https://cloud.onyx.app/signup).
-2. For self-hosting the Enterprise Edition, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).

+1. Checkout our [Cloud product](https://cloud.onyx.app/signup).
+2. For self-hosting, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).

 ## 💡 Contributing
+
 Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
+
+## ⭐Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=onyx-dot-app/onyx&type=Date)](https://star-history.com/#onyx-dot-app/onyx&Date)
+
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -28,16 +28,14 @@ RUN apt-get update && \
        curl \
        zip \
        ca-certificates \
-        libgnutls30 \
-        libblkid1 \
-        libmount1 \
-        libsmartcols1 \
-        libuuid1 \
+        libgnutls30=3.7.9-2+deb12u3 \
+        libblkid1=2.38.1-5+deb12u1 \
+        libmount1=2.38.1-5+deb12u1 \
+        libsmartcols1=2.38.1-5+deb12u1 \
+        libuuid1=2.38.1-5+deb12u1 \
        libxmlsec1-dev \
        pkg-config \
-        gcc \
-        nano \
-        vim && \
+        gcc && \
    rm -rf /var/lib/apt/lists/* && \
    apt-get clean

--- a/backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
+++ b/backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
@@ -1,27 +0,0 @@
-"""Add indexes to document__tag
-
-Revision ID: 1a03d2c2856b
-Revises: 9c00a2bccb83
-Create Date: 2025-02-18 10:45:13.957807
-
-"""
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "1a03d2c2856b"
-down_revision = "9c00a2bccb83"
-branch_labels: None = None
-depends_on: None = None
-
-
-def upgrade() -> None:
-    op.create_index(
-        op.f("ix_document__tag_tag_id"),
-        "document__tag",
-        ["tag_id"],
-        unique=False,
-    )
-
-
-def downgrade() -> None:
-    op.drop_index(op.f("ix_document__tag_tag_id"), table_name="document__tag")
--- a/backend/alembic/versions/3bd4c84fe72f_improved_index.py
+++ b/backend/alembic/versions/3bd4c84fe72f_improved_index.py
@@ -1,84 +0,0 @@
-"""improved index
-
-Revision ID: 3bd4c84fe72f
-Revises: 8f43500ee275
-Create Date: 2025-02-26 13:07:56.217791
-
-"""
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "3bd4c84fe72f"
-down_revision = "8f43500ee275"
-branch_labels = None
-depends_on = None
-
-
-# NOTE:
-# This migration addresses issues with the previous migration (8f43500ee275) which caused
-# an outage by creating an index without using CONCURRENTLY. This migration:
-#
-# 1. Creates more efficient full-text search capabilities using tsvector columns and GIN indexes
-# 2. Uses CONCURRENTLY for all index creation to prevent table locking
-# 3. Explicitly manages transactions with COMMIT statements to allow CONCURRENTLY to work
-# (see: https://www.postgresql.org/docs/9.4/sql-createindex.html#SQL-CREATEINDEX-CONCURRENTLY)
-# (see: https://github.com/sqlalchemy/alembic/issues/277)
-# 4. Adds indexes to both chat_message and chat_session tables for comprehensive search
-
-
-def upgrade() -> None:
-    # Create a GIN index for full-text search on chat_message.message
-    op.execute(
-        """
-        ALTER TABLE chat_message
-        ADD COLUMN message_tsv tsvector
-        GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;
-        """
-    )
-
-    # Commit the current transaction before creating concurrent indexes
-    op.execute("COMMIT")
-
-    op.execute(
-        """
-        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv
-        ON chat_message
-        USING GIN (message_tsv)
-        """
-    )
-
-    # Also add a stored tsvector column for chat_session.description
-    op.execute(
-        """
-        ALTER TABLE chat_session
-        ADD COLUMN description_tsv tsvector
-        GENERATED ALWAYS AS (to_tsvector('english', coalesce(description, ''))) STORED;
-        """
-    )
-
-    # Commit again before creating the second concurrent index
-    op.execute("COMMIT")
-
-    op.execute(
-        """
-        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv
-        ON chat_session
-        USING GIN (description_tsv)
-        """
-    )
-
-
-def downgrade() -> None:
-    # Drop the indexes first (use CONCURRENTLY for dropping too)
-    op.execute("COMMIT")
-    op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_message_tsv;")
-
-    op.execute("COMMIT")
-    op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_session_desc_tsv;")
-
-    # Then drop the columns
-    op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv;")
-    op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS description_tsv;")
-
-    op.execute("DROP INDEX IF EXISTS idx_chat_message_message_lower;")
--- a/backend/alembic/versions/8f43500ee275_add_index.py
+++ b/backend/alembic/versions/8f43500ee275_add_index.py
@@ -1,32 +0,0 @@
-"""add index
-
-Revision ID: 8f43500ee275
-Revises: da42808081e3
-Create Date: 2025-02-24 17:35:33.072714
-
-"""
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "8f43500ee275"
-down_revision = "da42808081e3"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Create a basic index on the lowercase message column for direct text matching
-    # Limit to 1500 characters to stay well under the 2856 byte limit of btree version 4
-    # op.execute(
-    #     """
-    #     CREATE INDEX idx_chat_message_message_lower
-    #     ON chat_message (LOWER(substring(message, 1, 1500)))
-    #     """
-    # )
-    pass
-
-
-def downgrade() -> None:
-    # Drop the index
-    op.execute("DROP INDEX IF EXISTS idx_chat_message_message_lower;")
--- a/backend/alembic/versions/9c00a2bccb83_chat_message_agentic.py
+++ b/backend/alembic/versions/9c00a2bccb83_chat_message_agentic.py
@@ -1,43 +0,0 @@
-"""chat_message_agentic
-
-Revision ID: 9c00a2bccb83
-Revises: b7a7eee5aa15
-Create Date: 2025-02-17 11:15:43.081150
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "9c00a2bccb83"
-down_revision = "b7a7eee5aa15"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # First add the column as nullable
-    op.add_column("chat_message", sa.Column("is_agentic", sa.Boolean(), nullable=True))
-
-    # Update existing rows based on presence of SubQuestions
-    op.execute(
-        """
-        UPDATE chat_message
-        SET is_agentic = EXISTS (
-            SELECT 1
-            FROM agent__sub_question
-            WHERE agent__sub_question.primary_question_id = chat_message.id
-        )
-        WHERE is_agentic IS NULL
-    """
-    )
-
-    # Make the column non-nullable with a default value of False
-    op.alter_column(
-        "chat_message", "is_agentic", nullable=False, server_default=sa.text("false")
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("chat_message", "is_agentic")
--- a/backend/alembic/versions/acaab4ef4507_remove_inactive_ccpair_status_on_.py
+++ b/backend/alembic/versions/acaab4ef4507_remove_inactive_ccpair_status_on_.py
@@ -1,29 +0,0 @@
-"""remove inactive ccpair status on downgrade
-
-Revision ID: acaab4ef4507
-Revises: b388730a2899
-Create Date: 2025-02-16 18:21:41.330212
-
-"""
-from alembic import op
-from onyx.db.models import ConnectorCredentialPair
-from onyx.db.enums import ConnectorCredentialPairStatus
-from sqlalchemy import update
-
-# revision identifiers, used by Alembic.
-revision = "acaab4ef4507"
-down_revision = "b388730a2899"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    pass
-
-
-def downgrade() -> None:
-    op.execute(
-        update(ConnectorCredentialPair)
-        .where(ConnectorCredentialPair.status == ConnectorCredentialPairStatus.INVALID)
-        .values(status=ConnectorCredentialPairStatus.ACTIVE)
-    )
--- a/backend/alembic/versions/b388730a2899_nullable_preferences.py
+++ b/backend/alembic/versions/b388730a2899_nullable_preferences.py
@@ -1,31 +0,0 @@
-"""nullable preferences
-
-Revision ID: b388730a2899
-Revises: 1a03d2c2856b
-Create Date: 2025-02-17 18:49:22.643902
-
-"""
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "b388730a2899"
-down_revision = "1a03d2c2856b"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.alter_column("user", "temperature_override_enabled", nullable=True)
-    op.alter_column("user", "auto_scroll", nullable=True)
-
-
-def downgrade() -> None:
-    # Ensure no null values before making columns non-nullable
-    op.execute(
-        'UPDATE "user" SET temperature_override_enabled = false WHERE temperature_override_enabled IS NULL'
-    )
-    op.execute('UPDATE "user" SET auto_scroll = false WHERE auto_scroll IS NULL')
-
-    op.alter_column("user", "temperature_override_enabled", nullable=False)
-    op.alter_column("user", "auto_scroll", nullable=False)
--- a/backend/alembic/versions/b7a7eee5aa15_add_checkpointing_failure_handling.py
+++ b/backend/alembic/versions/b7a7eee5aa15_add_checkpointing_failure_handling.py
@@ -1,124 +0,0 @@
-"""Add checkpointing/failure handling
-
-Revision ID: b7a7eee5aa15
-Revises: f39c5794c10a
-Create Date: 2025-01-24 15:17:36.763172
-
-"""
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = "b7a7eee5aa15"
-down_revision = "f39c5794c10a"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "index_attempt",
-        sa.Column("checkpoint_pointer", sa.String(), nullable=True),
-    )
-    op.add_column(
-        "index_attempt",
-        sa.Column("poll_range_start", sa.DateTime(timezone=True), nullable=True),
-    )
-    op.add_column(
-        "index_attempt",
-        sa.Column("poll_range_end", sa.DateTime(timezone=True), nullable=True),
-    )
-
-    op.create_index(
-        "ix_index_attempt_cc_pair_settings_poll",
-        "index_attempt",
-        [
-            "connector_credential_pair_id",
-            "search_settings_id",
-            "status",
-            sa.text("time_updated DESC"),
-        ],
-    )
-
-    # Drop the old IndexAttemptError table
-    op.drop_index("index_attempt_id", table_name="index_attempt_errors")
-    op.drop_table("index_attempt_errors")
-
-    # Create the new version of the table
-    op.create_table(
-        "index_attempt_errors",
-        sa.Column("id", sa.Integer(), primary_key=True),
-        sa.Column("index_attempt_id", sa.Integer(), nullable=False),
-        sa.Column("connector_credential_pair_id", sa.Integer(), nullable=False),
-        sa.Column("document_id", sa.String(), nullable=True),
-        sa.Column("document_link", sa.String(), nullable=True),
-        sa.Column("entity_id", sa.String(), nullable=True),
-        sa.Column("failed_time_range_start", sa.DateTime(timezone=True), nullable=True),
-        sa.Column("failed_time_range_end", sa.DateTime(timezone=True), nullable=True),
-        sa.Column("failure_message", sa.Text(), nullable=False),
-        sa.Column("is_resolved", sa.Boolean(), nullable=False, default=False),
-        sa.Column(
-            "time_created",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            nullable=False,
-        ),
-        sa.ForeignKeyConstraint(
-            ["index_attempt_id"],
-            ["index_attempt.id"],
-        ),
-        sa.ForeignKeyConstraint(
-            ["connector_credential_pair_id"],
-            ["connector_credential_pair.id"],
-        ),
-    )
-
-
-def downgrade() -> None:
-    op.execute("SET lock_timeout = '5s'")
-
-    # try a few times to drop the table, this has been observed to fail due to other locks
-    # blocking the drop
-    NUM_TRIES = 10
-    for i in range(NUM_TRIES):
-        try:
-            op.drop_table("index_attempt_errors")
-            break
-        except Exception as e:
-            if i == NUM_TRIES - 1:
-                raise e
-            print(f"Error dropping table: {e}. Retrying...")
-
-    op.execute("SET lock_timeout = DEFAULT")
-
-    # Recreate the old IndexAttemptError table
-    op.create_table(
-        "index_attempt_errors",
-        sa.Column("id", sa.Integer(), primary_key=True),
-        sa.Column("index_attempt_id", sa.Integer(), nullable=True),
-        sa.Column("batch", sa.Integer(), nullable=True),
-        sa.Column("doc_summaries", postgresql.JSONB(), nullable=False),
-        sa.Column("error_msg", sa.Text(), nullable=True),
-        sa.Column("traceback", sa.Text(), nullable=True),
-        sa.Column(
-            "time_created",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-        ),
-        sa.ForeignKeyConstraint(
-            ["index_attempt_id"],
-            ["index_attempt.id"],
-        ),
-    )
-
-    op.create_index(
-        "index_attempt_id",
-        "index_attempt_errors",
-        ["time_created"],
-    )
-
-    op.drop_index("ix_index_attempt_cc_pair_settings_poll")
-    op.drop_column("index_attempt", "checkpoint_pointer")
-    op.drop_column("index_attempt", "poll_range_start")
-    op.drop_column("index_attempt", "poll_range_end")
--- a/backend/alembic/versions/da42808081e3_migrate_jira_connectors_to_new_format.py
+++ b/backend/alembic/versions/da42808081e3_migrate_jira_connectors_to_new_format.py
@@ -1,120 +0,0 @@
-"""migrate jira connectors to new format
-
-Revision ID: da42808081e3
-Revises: f13db29f3101
-Create Date: 2025-02-24 11:24:54.396040
-
-"""
-from alembic import op
-import sqlalchemy as sa
-import json
-
-from onyx.configs.constants import DocumentSource
-from onyx.connectors.onyx_jira.utils import extract_jira_project
-
-
-# revision identifiers, used by Alembic.
-revision = "da42808081e3"
-down_revision = "f13db29f3101"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # Get all Jira connectors
-    conn = op.get_bind()
-
-    # First get all Jira connectors
-    jira_connectors = conn.execute(
-        sa.text(
-            """
-            SELECT id, connector_specific_config
-            FROM connector
-            WHERE source = :source
-            """
-        ),
-        {"source": DocumentSource.JIRA.value.upper()},
-    ).fetchall()
-
-    # Update each connector's config
-    for connector_id, old_config in jira_connectors:
-        if not old_config:
-            continue
-
-        # Extract project key from URL if it exists
-        new_config: dict[str, str | None] = {}
-        if project_url := old_config.get("jira_project_url"):
-            # Parse the URL to get base and project
-            try:
-                jira_base, project_key = extract_jira_project(project_url)
-                new_config = {"jira_base_url": jira_base, "project_key": project_key}
-            except ValueError:
-                # If URL parsing fails, just use the URL as the base
-                new_config = {
-                    "jira_base_url": project_url.split("/projects/")[0],
-                    "project_key": None,
-                }
-        else:
-            # For connectors without a project URL, we need admin intervention
-            # Mark these for review
-            print(
-                f"WARNING: Jira connector {connector_id} has no project URL configured"
-            )
-            continue
-
-        # Update the connector config
-        conn.execute(
-            sa.text(
-                """
-                UPDATE connector
-                SET connector_specific_config = :new_config
-                WHERE id = :id
-                """
-            ),
-            {"id": connector_id, "new_config": json.dumps(new_config)},
-        )
-
-
-def downgrade() -> None:
-    # Get all Jira connectors
-    conn = op.get_bind()
-
-    # First get all Jira connectors
-    jira_connectors = conn.execute(
-        sa.text(
-            """
-            SELECT id, connector_specific_config
-            FROM connector
-            WHERE source = :source
-            """
-        ),
-        {"source": DocumentSource.JIRA.value.upper()},
-    ).fetchall()
-
-    # Update each connector's config back to the old format
-    for connector_id, new_config in jira_connectors:
-        if not new_config:
-            continue
-
-        old_config = {}
-        base_url = new_config.get("jira_base_url")
-        project_key = new_config.get("project_key")
-
-        if base_url and project_key:
-            old_config = {"jira_project_url": f"{base_url}/projects/{project_key}"}
-        elif base_url:
-            old_config = {"jira_project_url": base_url}
-        else:
-            continue
-
-        # Update the connector config
-        conn.execute(
-            sa.text(
-                """
-                UPDATE connector
-                SET connector_specific_config = :old_config
-                WHERE id = :id
-                """
-            ),
-            {"id": connector_id, "old_config": old_config},
-        )
--- a/backend/alembic/versions/f11b408e39d3_force_lowercase_all_users.py
+++ b/backend/alembic/versions/f11b408e39d3_force_lowercase_all_users.py
@@ -1,36 +0,0 @@
-"""force lowercase all users
-
-Revision ID: f11b408e39d3
-Revises: 3bd4c84fe72f
-Create Date: 2025-02-26 17:04:55.683500
-
-"""
-
-
-# revision identifiers, used by Alembic.
-revision = "f11b408e39d3"
-down_revision = "3bd4c84fe72f"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # 1) Convert all existing user emails to lowercase
-    from alembic import op
-
-    op.execute(
-        """
-        UPDATE "user"
-        SET email = LOWER(email)
-        """
-    )
-
-    # 2) Add a check constraint to ensure emails are always lowercase
-    op.create_check_constraint("ensure_lowercase_email", "user", "email = LOWER(email)")
-
-
-def downgrade() -> None:
-    # Drop the check constraint
-    from alembic import op
-
-    op.drop_constraint("ensure_lowercase_email", "user", type_="check")
--- a/backend/alembic/versions/f13db29f3101_add_composite_index_for_last_modified_.py
+++ b/backend/alembic/versions/f13db29f3101_add_composite_index_for_last_modified_.py
@@ -1,27 +0,0 @@
-"""Add composite index for last_modified and last_synced to document
-
-Revision ID: f13db29f3101
-Revises: b388730a2899
-Create Date: 2025-02-18 22:48:11.511389
-
-"""
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "f13db29f3101"
-down_revision = "acaab4ef4507"
-branch_labels: str | None = None
-depends_on: str | None = None
-
-
-def upgrade() -> None:
-    op.create_index(
-        "ix_document_sync_status",
-        "document",
-        ["last_modified", "last_synced"],
-        unique=False,
-    )
-
-
-def downgrade() -> None:
-    op.drop_index("ix_document_sync_status", table_name="document")
--- a/backend/alembic/versions/f39c5794c10a_add_background_errors_table.py
+++ b/backend/alembic/versions/f39c5794c10a_add_background_errors_table.py
@@ -1,40 +0,0 @@
-"""Add background errors table
-
-Revision ID: f39c5794c10a
-Revises: 2cdeff6d8c93
-Create Date: 2025-02-12 17:11:14.527876
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-# revision identifiers, used by Alembic.
-revision = "f39c5794c10a"
-down_revision = "2cdeff6d8c93"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.create_table(
-        "background_error",
-        sa.Column("id", sa.Integer(), nullable=False),
-        sa.Column("message", sa.String(), nullable=False),
-        sa.Column(
-            "time_created",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            nullable=False,
-        ),
-        sa.Column("cc_pair_id", sa.Integer(), nullable=True),
-        sa.PrimaryKeyConstraint("id"),
-        sa.ForeignKeyConstraint(
-            ["cc_pair_id"],
-            ["connector_credential_pair.id"],
-            ondelete="CASCADE",
-        ),
-    )
-
-
-def downgrade() -> None:
-    op.drop_table("background_error")
--- a/backend/alembic_tenants/versions/34e3630c7f32_lowercase_multi_tenant_user_auth.py
+++ b/backend/alembic_tenants/versions/34e3630c7f32_lowercase_multi_tenant_user_auth.py
@@ -1,42 +0,0 @@
-"""lowercase multi-tenant user auth
-
-Revision ID: 34e3630c7f32
-Revises: a4f6ee863c47
-Create Date: 2025-02-26 15:03:01.211894
-
-"""
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "34e3630c7f32"
-down_revision = "a4f6ee863c47"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # 1) Convert all existing rows to lowercase
-    op.execute(
-        """
-        UPDATE user_tenant_mapping
-        SET email = LOWER(email)
-        """
-    )
-    # 2) Add a check constraint so that emails cannot be written in uppercase
-    op.create_check_constraint(
-        "ensure_lowercase_email",
-        "user_tenant_mapping",
-        "email = LOWER(email)",
-        schema="public",
-    )
-
-
-def downgrade() -> None:
-    # Drop the check constraint
-    op.drop_constraint(
-        "ensure_lowercase_email",
-        "user_tenant_mapping",
-        schema="public",
-        type_="check",
-    )
--- a/backend/ee/onyx/background/celery/apps/primary.py
+++ b/backend/ee/onyx/background/celery/apps/primary.py
@@ -5,9 +5,11 @@ from onyx.background.celery.apps.primary import celery_app
 from onyx.background.task_utils import build_celery_task_wrapper
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.db.chat import delete_chat_sessions_older_than
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.server.settings.store import load_settings
 from onyx.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT
+from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

 logger = setup_logger()

@@ -16,8 +18,10 @@ logger = setup_logger()

@build_celery_task_wrapper(name_chat_ttl_task)
@celery_app.task(soft_time_limit=JOB_TIMEOUT)
-def perform_ttl_management_task(retention_limit_days: int, *, tenant_id: str) -> None:
-    with get_session_with_current_tenant() as db_session:
+def perform_ttl_management_task(
+    retention_limit_days: int, *, tenant_id: str | None
+) -> None:
+    with get_session_with_tenant(tenant_id) as db_session:
        delete_chat_sessions_older_than(retention_limit_days, db_session)


@@ -31,19 +35,24 @@ def perform_ttl_management_task(retention_limit_days: int, *, tenant_id: str) ->
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
 )
-def check_ttl_management_task(*, tenant_id: str) -> None:
+def check_ttl_management_task(*, tenant_id: str | None) -> None:
    """Runs periodically to check if any ttl tasks should be run and adds them
    to the queue"""
+    token = None
+    if MULTI_TENANT and tenant_id is not None:
+        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

    settings = load_settings()
    retention_limit_days = settings.maximum_chat_retention_days
-    with get_session_with_current_tenant() as db_session:
+    with get_session_with_tenant(tenant_id) as db_session:
        if should_perform_chat_ttl_check(retention_limit_days, db_session):
            perform_ttl_management_task.apply_async(
                kwargs=dict(
                    retention_limit_days=retention_limit_days, tenant_id=tenant_id
                ),
            )
+    if token is not None:
+        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


@celery_app.task(
@@ -51,9 +60,9 @@ def check_ttl_management_task(*, tenant_id: str) -> None:
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
 )
-def autogenerate_usage_report_task(*, tenant_id: str) -> None:
+def autogenerate_usage_report_task(*, tenant_id: str | None) -> None:
    """This generates usage report under the /admin/generate-usage/report endpoint"""
-    with get_session_with_current_tenant() as db_session:
+    with get_session_with_tenant(tenant_id) as db_session:
        create_new_usage_report(
            db_session=db_session,
            user_id=None,
--- a/backend/ee/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/ee/onyx/background/celery/tasks/beat_schedule.py
@@ -1,10 +1,10 @@
 from datetime import timedelta
 from typing import Any

-from onyx.background.celery.tasks.beat_schedule import (
-    beat_cloud_tasks as base_beat_system_tasks,
-)
 from onyx.background.celery.tasks.beat_schedule import BEAT_EXPIRES_DEFAULT
+from onyx.background.celery.tasks.beat_schedule import (
+    beat_system_tasks as base_beat_system_tasks,
+)
 from onyx.background.celery.tasks.beat_schedule import (
    beat_task_templates as base_beat_task_templates,
 )
--- a/backend/ee/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/vespa/tasks.py
@@ -18,7 +18,7 @@ logger = setup_logger()


 def monitor_usergroup_taskset(
-    tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session
+    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
 ) -> None:
    """This function is likely to move in the worker refactor happening next."""
    fence_key = key_bytes.decode("utf-8")
--- a/backend/ee/onyx/db/connector_credential_pair.py
+++ b/backend/ee/onyx/db/connector_credential_pair.py
@@ -4,7 +4,6 @@ from sqlalchemy.orm import Session
 from onyx.configs.constants import DocumentSource
 from onyx.db.connector_credential_pair import get_connector_credential_pair
 from onyx.db.enums import AccessType
-from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.models import Connector
 from onyx.db.models import ConnectorCredentialPair
 from onyx.db.models import UserGroup__ConnectorCredentialPair
@@ -36,11 +35,10 @@ def _delete_connector_credential_pair_user_groups_relationship__no_commit(
 def get_cc_pairs_by_source(
    db_session: Session,
    source_type: DocumentSource,
-    access_type: AccessType | None = None,
-    status: ConnectorCredentialPairStatus | None = None,
+    only_sync: bool,
 ) -> list[ConnectorCredentialPair]:
    """
-    Get all cc_pairs for a given source type with optional filtering by access_type and status
+    Get all cc_pairs for a given source type (and optionally only sync)
    result is sorted by cc_pair id
    """
    query = (
@@ -50,11 +48,8 @@ def get_cc_pairs_by_source(
        .order_by(ConnectorCredentialPair.id)
    )

-    if access_type is not None:
-        query = query.filter(ConnectorCredentialPair.access_type == access_type)
-
-    if status is not None:
-        query = query.filter(ConnectorCredentialPair.status == status)
+    if only_sync:
+        query = query.filter(ConnectorCredentialPair.access_type == AccessType.SYNC)

    cc_pairs = query.all()
    return cc_pairs
--- a/backend/ee/onyx/external_permissions/confluence/group_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/group_sync.py
@@ -1,6 +1,5 @@
 from ee.onyx.db.external_perm import ExternalUserGroup
 from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
-from onyx.background.error_logging import emit_background_error
 from onyx.connectors.confluence.onyx_confluence import build_confluence_client
 from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
 from onyx.connectors.confluence.utils import get_user_email_from_username__server
@@ -11,52 +10,43 @@ logger = setup_logger()


 def _build_group_member_email_map(
-    confluence_client: OnyxConfluence, cc_pair_id: int
+    confluence_client: OnyxConfluence,
 ) -> dict[str, set[str]]:
    group_member_emails: dict[str, set[str]] = {}
-    for user in confluence_client.paginated_cql_user_retrieval():
-        logger.debug(f"Processing groups for user: {user}")
+    for user_result in confluence_client.paginated_cql_user_retrieval():
+        logger.debug(f"Processing groups for user: {user_result}")

-        email = user.email
+        user = user_result.get("user", {})
+        if not user:
+            logger.warning(f"user result missing user field: {user_result}")
+            continue
+        email = user.get("email")
        if not email:
            # This field is only present in Confluence Server
-            user_name = user.username
+            user_name = user.get("username")
            # If it is present, try to get the email using a Server-specific method
            if user_name:
                email = get_user_email_from_username__server(
                    confluence_client=confluence_client,
                    user_name=user_name,
                )
-
        if not email:
            # If we still don't have an email, skip this user
-            msg = f"user result missing email field: {user}"
-            if user.type == "app":
-                logger.warning(msg)
-            else:
-                emit_background_error(msg, cc_pair_id=cc_pair_id)
-                logger.error(msg)
+            logger.warning(f"user result missing email field: {user_result}")
            continue

        all_users_groups: set[str] = set()
-        for group in confluence_client.paginated_groups_by_user_retrieval(user.user_id):
+        for group in confluence_client.paginated_groups_by_user_retrieval(user):
            # group name uniqueness is enforced by Confluence, so we can use it as a group ID
            group_id = group["name"]
            group_member_emails.setdefault(group_id, set()).add(email)
            all_users_groups.add(group_id)

-        if not all_users_groups:
-            msg = f"No groups found for user with email: {email}"
-            emit_background_error(msg, cc_pair_id=cc_pair_id)
-            logger.error(msg)
+        if not group_member_emails:
+            logger.warning(f"No groups found for user with email: {email}")
        else:
            logger.debug(f"Found groups {all_users_groups} for user with email {email}")

-    if not group_member_emails:
-        msg = "No groups found for any users."
-        emit_background_error(msg, cc_pair_id=cc_pair_id)
-        logger.error(msg)
-
    return group_member_emails


@@ -71,7 +61,6 @@ def confluence_group_sync(

    group_member_email_map = _build_group_member_email_map(
        confluence_client=confluence_client,
-        cc_pair_id=cc_pair.id,
    )
    onyx_groups: list[ExternalUserGroup] = []
    all_found_emails = set()
--- a/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
@@ -62,14 +62,12 @@ def _fetch_permissions_for_permission_ids(
        user_email=(owner_email or google_drive_connector.primary_admin_email),
    )

-    # We continue on 404 or 403 because the document may not exist or the user may not have access to it
    fetched_permissions = execute_paginated_retrieval(
        retrieval_function=drive_service.permissions().list,
        list_key="permissions",
        fileId=doc_id,
        fields="permissions(id, emailAddress, type, domain)",
        supportsAllDrives=True,
-        continue_on_404_or_403=True,
    )

    permissions_for_doc_id = []
@@ -106,13 +104,7 @@ def _get_permissions_from_slim_doc(
    user_emails: set[str] = set()
    group_emails: set[str] = set()
    public = False
-    skipped_permissions = 0
-
    for permission in permissions_list:
-        if not permission:
-            skipped_permissions += 1
-            continue
-
        permission_type = permission["type"]
        if permission_type == "user":
            user_emails.add(permission["emailAddress"])
@@ -129,11 +121,6 @@ def _get_permissions_from_slim_doc(
        elif permission_type == "anyone":
            public = True

-    if skipped_permissions > 0:
-        logger.warning(
-            f"Skipped {skipped_permissions} permissions of {len(permissions_list)} for document {slim_doc.id}"
-        )
-
    drive_id = permission_info.get("drive_id")
    group_ids = group_emails | ({drive_id} if drive_id is not None else set())

--- a/backend/ee/onyx/external_permissions/slack/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/slack/doc_sync.py
@@ -5,7 +5,7 @@ from onyx.access.models import DocExternalAccess
 from onyx.access.models import ExternalAccess
 from onyx.connectors.slack.connector import get_channels
 from onyx.connectors.slack.connector import make_paginated_slack_api_call_w_retries
-from onyx.connectors.slack.connector import SlackConnector
+from onyx.connectors.slack.connector import SlackPollConnector
 from onyx.db.models import ConnectorCredentialPair
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger
@@ -17,7 +17,7 @@ logger = setup_logger()
 def _get_slack_document_ids_and_channels(
    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
 ) -> dict[str, list[str]]:
-    slack_connector = SlackConnector(**cc_pair.connector.connector_specific_config)
+    slack_connector = SlackPollConnector(**cc_pair.connector.connector_specific_config)
    slack_connector.load_credentials(cc_pair.credential.credential_json)

    slim_doc_generator = slack_connector.retrieve_all_slim_documents(callback=callback)
--- a/backend/ee/onyx/server/middleware/tenant_tracking.py
+++ b/backend/ee/onyx/server/middleware/tenant_tracking.py
@@ -33,7 +33,7 @@ def add_tenant_id_middleware(app: FastAPI, logger: logging.LoggerAdapter) -> Non
            return await call_next(request)

        except Exception as e:
-            logger.exception(f"Error in tenant ID middleware: {str(e)}")
+            logger.error(f"Error in tenant ID middleware: {str(e)}")
            raise


@@ -49,7 +49,7 @@ async def _get_tenant_id_from_request(
    """
    # Check for API key
    tenant_id = extract_tenant_from_api_key_header(request)
-    if tenant_id is not None:
+    if tenant_id:
        return tenant_id

    # Check for anonymous user cookie
--- a/backend/ee/onyx/server/oauth.py
+++ b/backend/ee/onyx/server/oauth.py
@@ -36,12 +36,12 @@ from onyx.connectors.google_utils.shared_constants import (
    GoogleOAuthAuthenticationMethod,
 )
 from onyx.db.credentials import create_credential
+from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session
 from onyx.db.models import User
 from onyx.redis.redis_pool import get_redis_client
 from onyx.server.documents.models import CredentialBase
 from onyx.utils.logger import setup_logger
-from shared_configs.contextvars import get_current_tenant_id


 logger = setup_logger()
@@ -271,12 +271,12 @@ def prepare_authorization_request(
    connector: DocumentSource,
    redirect_on_success: str | None,
    user: User = Depends(current_user),
+    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> JSONResponse:
    """Used by the frontend to generate the url for the user's browser during auth request.

    Example: https://www.oauth.com/oauth2-servers/authorization/the-authorization-request/
    """
-    tenant_id = get_current_tenant_id()

    # create random oauth state param for security and to retrieve user data later
    oauth_uuid = uuid.uuid4()
@@ -329,6 +329,7 @@ def handle_slack_oauth_callback(
    state: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
+    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> JSONResponse:
    if not SlackOAuth.CLIENT_ID or not SlackOAuth.CLIENT_SECRET:
        raise HTTPException(
@@ -336,7 +337,7 @@ def handle_slack_oauth_callback(
            detail="Slack client ID or client secret is not configured.",
        )

-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    # recover the state
    padded_state = state + "=" * (
@@ -522,6 +523,7 @@ def handle_google_drive_oauth_callback(
    state: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
+    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> JSONResponse:
    if not GoogleDriveOAuth.CLIENT_ID or not GoogleDriveOAuth.CLIENT_SECRET:
        raise HTTPException(
@@ -529,7 +531,7 @@ def handle_google_drive_oauth_callback(
            detail="Google Drive client ID or client secret is not configured.",
        )

-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    # recover the state
    padded_state = state + "=" * (
--- a/backend/ee/onyx/server/query_and_chat/query_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/query_backend.py
@@ -83,7 +83,6 @@ def handle_search_request(
        user=user,
        llm=llm,
        fast_llm=fast_llm,
-        skip_query_analysis=False,
        db_session=db_session,
        bypass_acl=False,
    )
--- a/backend/ee/onyx/server/query_and_chat/token_limit.py
+++ b/backend/ee/onyx/server/query_and_chat/token_limit.py
@@ -13,7 +13,7 @@ from sqlalchemy import select
 from sqlalchemy.orm import Session

 from onyx.db.api_key import is_api_key_email_address
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.models import ChatMessage
 from onyx.db.models import ChatSession
 from onyx.db.models import TokenRateLimit
@@ -28,21 +28,21 @@ from onyx.server.query_and_chat.token_limit import _user_is_rate_limited_by_glob
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel


-def _check_token_rate_limits(user: User | None) -> None:
+def _check_token_rate_limits(user: User | None, tenant_id: str | None) -> None:
    if user is None:
        # Unauthenticated users are only rate limited by global settings
-        _user_is_rate_limited_by_global()
+        _user_is_rate_limited_by_global(tenant_id)

    elif is_api_key_email_address(user.email):
        # API keys are only rate limited by global settings
-        _user_is_rate_limited_by_global()
+        _user_is_rate_limited_by_global(tenant_id)

    else:
        run_functions_tuples_in_parallel(
            [
-                (_user_is_rate_limited, (user.id,)),
-                (_user_is_rate_limited_by_group, (user.id,)),
-                (_user_is_rate_limited_by_global, ()),
+                (_user_is_rate_limited, (user.id, tenant_id)),
+                (_user_is_rate_limited_by_group, (user.id, tenant_id)),
+                (_user_is_rate_limited_by_global, (tenant_id,)),
            ]
        )

@@ -52,8 +52,8 @@ User rate limits
 """


-def _user_is_rate_limited(user_id: UUID) -> None:
-    with get_session_with_current_tenant() as db_session:
+def _user_is_rate_limited(user_id: UUID, tenant_id: str | None) -> None:
+    with get_session_with_tenant(tenant_id) as db_session:
        user_rate_limits = fetch_all_user_token_rate_limits(
            db_session=db_session, enabled_only=True, ordered=False
        )
@@ -93,8 +93,8 @@ User Group rate limits
 """


-def _user_is_rate_limited_by_group(user_id: UUID) -> None:
-    with get_session_with_current_tenant() as db_session:
+def _user_is_rate_limited_by_group(user_id: UUID, tenant_id: str | None) -> None:
+    with get_session_with_tenant(tenant_id) as db_session:
        group_rate_limits = _fetch_all_user_group_rate_limits(user_id, db_session)

        if group_rate_limits:
--- a/backend/ee/onyx/server/query_history/api.py
+++ b/backend/ee/onyx/server/query_history/api.py
@@ -2,7 +2,6 @@ import csv
 import io
 from datetime import datetime
 from datetime import timezone
-from http import HTTPStatus
 from uuid import UUID

 from fastapi import APIRouter
@@ -22,10 +21,8 @@ from ee.onyx.server.query_history.models import QuestionAnswerPairSnapshot
 from onyx.auth.users import current_admin_user
 from onyx.auth.users import get_display_email
 from onyx.chat.chat_utils import create_chat_chain
-from onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE
 from onyx.configs.constants import MessageType
 from onyx.configs.constants import QAFeedbackType
-from onyx.configs.constants import QueryHistoryType
 from onyx.configs.constants import SessionType
 from onyx.db.chat import get_chat_session_by_id
 from onyx.db.chat import get_chat_sessions_by_user
@@ -38,8 +35,6 @@ from onyx.server.query_and_chat.models import ChatSessionsResponse

 router = APIRouter()

-ONYX_ANONYMIZED_EMAIL = "anonymous@anonymous.invalid"
-

 def fetch_and_process_chat_session_history(
    db_session: Session,
@@ -112,17 +107,6 @@ def get_user_chat_sessions(
    _: User | None = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
 ) -> ChatSessionsResponse:
-    # we specifically don't allow this endpoint if "anonymized" since
-    # this is a direct query on the user id
-    if ONYX_QUERY_HISTORY_TYPE in [
-        QueryHistoryType.DISABLED,
-        QueryHistoryType.ANONYMIZED,
-    ]:
-        raise HTTPException(
-            status_code=HTTPStatus.FORBIDDEN,
-            detail="Per user query history has been disabled by the administrator.",
-        )
-
    try:
        chat_sessions = get_chat_sessions_by_user(
            user_id=user_id, deleted=False, db_session=db_session, limit=0
@@ -138,7 +122,6 @@ def get_user_chat_sessions(
                name=chat.description,
                persona_id=chat.persona_id,
                time_created=chat.time_created.isoformat(),
-                time_updated=chat.time_updated.isoformat(),
                shared_status=chat.shared_status,
                folder_id=chat.folder_id,
                current_alternate_model=chat.current_alternate_model,
@@ -158,12 +141,6 @@ def get_chat_session_history(
    _: User | None = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
 ) -> PaginatedReturn[ChatSessionMinimal]:
-    if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.DISABLED:
-        raise HTTPException(
-            status_code=HTTPStatus.FORBIDDEN,
-            detail="Query history has been disabled by the administrator.",
-        )
-
    page_of_chat_sessions = get_page_of_chat_sessions(
        page_num=page_num,
        page_size=page_size,
@@ -180,16 +157,11 @@ def get_chat_session_history(
        feedback_filter=feedback_type,
    )

-    minimal_chat_sessions: list[ChatSessionMinimal] = []
-
-    for chat_session in page_of_chat_sessions:
-        minimal_chat_session = ChatSessionMinimal.from_chat_session(chat_session)
-        if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:
-            minimal_chat_session.user_email = ONYX_ANONYMIZED_EMAIL
-        minimal_chat_sessions.append(minimal_chat_session)
-
    return PaginatedReturn(
-        items=minimal_chat_sessions,
+        items=[
+            ChatSessionMinimal.from_chat_session(chat_session)
+            for chat_session in page_of_chat_sessions
+        ],
        total_items=total_filtered_chat_sessions_count,
    )

@@ -200,12 +172,6 @@ def get_chat_session_admin(
    _: User | None = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
 ) -> ChatSessionSnapshot:
-    if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.DISABLED:
-        raise HTTPException(
-            status_code=HTTPStatus.FORBIDDEN,
-            detail="Query history has been disabled by the administrator.",
-        )
-
    try:
        chat_session = get_chat_session_by_id(
            chat_session_id=chat_session_id,
@@ -227,9 +193,6 @@ def get_chat_session_admin(
            f"Could not create snapshot for chat session with id '{chat_session_id}'",
        )

-    if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:
-        snapshot.user_email = ONYX_ANONYMIZED_EMAIL
-
    return snapshot


@@ -240,12 +203,6 @@ def get_query_history_as_csv(
    end: datetime | None = None,
    db_session: Session = Depends(get_session),
 ) -> StreamingResponse:
-    if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.DISABLED:
-        raise HTTPException(
-            status_code=HTTPStatus.FORBIDDEN,
-            detail="Query history has been disabled by the administrator.",
-        )
-
    complete_chat_session_history = fetch_and_process_chat_session_history(
        db_session=db_session,
        start=start or datetime.fromtimestamp(0, tz=timezone.utc),
@@ -256,9 +213,6 @@ def get_query_history_as_csv(

    question_answer_pairs: list[QuestionAnswerPairSnapshot] = []
    for chat_session_snapshot in complete_chat_session_history:
-        if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:
-            chat_session_snapshot.user_email = ONYX_ANONYMIZED_EMAIL
-
        question_answer_pairs.extend(
            QuestionAnswerPairSnapshot.from_chat_session_snapshot(chat_session_snapshot)
        )
--- a/backend/ee/onyx/server/tenants/api.py
+++ b/backend/ee/onyx/server/tenants/api.py
@@ -41,15 +41,14 @@ from onyx.auth.users import User
 from onyx.configs.app_configs import WEB_DOMAIN
 from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
 from onyx.db.auth import get_user_count
+from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session
-from onyx.db.engine import get_session_with_shared_schema
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.users import delete_user_from_db
 from onyx.db.users import get_user_by_email
 from onyx.server.manage.models import UserByEmail
 from onyx.utils.logger import setup_logger
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
-from shared_configs.contextvars import get_current_tenant_id

 stripe.api_key = STRIPE_SECRET_KEY
 logger = setup_logger()
@@ -58,14 +57,13 @@ router = APIRouter(prefix="/tenants")

@router.get("/anonymous-user-path")
 async def get_anonymous_user_path_api(
+    tenant_id: str | None = Depends(get_current_tenant_id),
    _: User | None = Depends(current_admin_user),
 ) -> AnonymousUserPath:
-    tenant_id = get_current_tenant_id()
-
    if tenant_id is None:
        raise HTTPException(status_code=404, detail="Tenant not found")

-    with get_session_with_shared_schema() as db_session:
+    with get_session_with_tenant(tenant_id=None) as db_session:
        current_path = get_anonymous_user_path(tenant_id, db_session)

    return AnonymousUserPath(anonymous_user_path=current_path)
@@ -74,15 +72,15 @@ async def get_anonymous_user_path_api(
@router.post("/anonymous-user-path")
 async def set_anonymous_user_path_api(
    anonymous_user_path: str,
+    tenant_id: str = Depends(get_current_tenant_id),
    _: User | None = Depends(current_admin_user),
 ) -> None:
-    tenant_id = get_current_tenant_id()
    try:
        validate_anonymous_user_path(anonymous_user_path)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

-    with get_session_with_shared_schema() as db_session:
+    with get_session_with_tenant(tenant_id=None) as db_session:
        try:
            modify_anonymous_user_path(tenant_id, anonymous_user_path, db_session)
        except IntegrityError:
@@ -103,7 +101,7 @@ async def login_as_anonymous_user(
    anonymous_user_path: str,
    _: User | None = Depends(optional_user),
 ) -> Response:
-    with get_session_with_shared_schema() as db_session:
+    with get_session_with_tenant(tenant_id=None) as db_session:
        tenant_id = get_tenant_id_for_anonymous_user_path(
            anonymous_user_path, db_session
        )
@@ -152,17 +150,14 @@ async def billing_information(
    _: User = Depends(current_admin_user),
 ) -> BillingInformation | SubscriptionStatusResponse:
    logger.info("Fetching billing information")
-    tenant_id = get_current_tenant_id()
-    return fetch_billing_information(tenant_id)
+    return fetch_billing_information(CURRENT_TENANT_ID_CONTEXTVAR.get())


@router.post("/create-customer-portal-session")
-async def create_customer_portal_session(
-    _: User = Depends(current_admin_user),
-) -> dict:
-    tenant_id = get_current_tenant_id()
-
+async def create_customer_portal_session(_: User = Depends(current_admin_user)) -> dict:
    try:
+        # Fetch tenant_id and current tenant's information
+        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
        stripe_info = fetch_tenant_stripe_information(tenant_id)
        stripe_customer_id = stripe_info.get("stripe_customer_id")
        if not stripe_customer_id:
@@ -186,8 +181,6 @@ async def create_subscription_session(
 ) -> SubscriptionSessionResponse:
    try:
        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
-        if not tenant_id:
-            raise HTTPException(status_code=400, detail="Tenant ID not found")
        session_id = fetch_stripe_checkout_session(tenant_id)
        return SubscriptionSessionResponse(sessionId=session_id)

@@ -204,7 +197,7 @@ async def impersonate_user(
    """Allows a cloud superuser to impersonate another user by generating an impersonation JWT token"""
    tenant_id = get_tenant_id_for_email(impersonate_request.email)

-    with get_session_with_tenant(tenant_id=tenant_id) as tenant_session:
+    with get_session_with_tenant(tenant_id) as tenant_session:
        user_to_impersonate = get_user_by_email(
            impersonate_request.email, tenant_session
        )
@@ -228,9 +221,8 @@ async def leave_organization(
    user_email: UserByEmail,
    current_user: User | None = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
+    tenant_id: str = Depends(get_current_tenant_id),
 ) -> None:
-    tenant_id = get_current_tenant_id()
-
    if current_user is None or current_user.email != user_email.user_email:
        raise HTTPException(
            status_code=403, detail="You can only leave the organization as yourself"
--- a/backend/ee/onyx/server/tenants/billing.py
+++ b/backend/ee/onyx/server/tenants/billing.py
@@ -7,7 +7,6 @@ from ee.onyx.configs.app_configs import STRIPE_PRICE_ID
 from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
 from ee.onyx.server.tenants.access import generate_data_plane_token
 from ee.onyx.server.tenants.models import BillingInformation
-from ee.onyx.server.tenants.models import SubscriptionStatusResponse
 from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
 from onyx.utils.logger import setup_logger

@@ -42,9 +41,7 @@ def fetch_tenant_stripe_information(tenant_id: str) -> dict:
    return response.json()


-def fetch_billing_information(
-    tenant_id: str,
-) -> BillingInformation | SubscriptionStatusResponse:
+def fetch_billing_information(tenant_id: str) -> BillingInformation:
    logger.info("Fetching billing information")
    token = generate_data_plane_token()
    headers = {
@@ -55,19 +52,8 @@ def fetch_billing_information(
    params = {"tenant_id": tenant_id}
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
-
-    response_data = response.json()
-
-    # Check if the response indicates no subscription
-    if (
-        isinstance(response_data, dict)
-        and "subscribed" in response_data
-        and not response_data["subscribed"]
-    ):
-        return SubscriptionStatusResponse(**response_data)
-
-    # Otherwise, parse as BillingInformation
-    return BillingInformation(**response_data)
+    billing_info = BillingInformation(**response.json())
+    return billing_info


 def register_tenant_users(tenant_id: str, number_of_users: int) -> stripe.Subscription:
--- a/backend/ee/onyx/server/tenants/product_gating.py
+++ b/backend/ee/onyx/server/tenants/product_gating.py
@@ -2,7 +2,6 @@ from typing import cast

 from ee.onyx.configs.app_configs import GATED_TENANTS_KEY
 from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
-from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.server.settings.models import ApplicationStatus
 from onyx.server.settings.store import load_settings
@@ -14,7 +13,7 @@ logger = setup_logger()


 def update_tenant_gating(tenant_id: str, status: ApplicationStatus) -> None:
-    redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)
+    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)

    # Store the full status
    status_key = f"tenant:{tenant_id}:status"
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -118,7 +118,7 @@ async def provision_tenant(tenant_id: str, email: str) -> None:
        # Await the Alembic migrations
        await asyncio.to_thread(run_alembic_migrations, tenant_id)

-        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            configure_default_api_keys(db_session)

            current_search_settings = (
@@ -134,7 +134,7 @@ async def provision_tenant(tenant_id: str, email: str) -> None:

        add_users_to_tenant([email], tenant_id)

-        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            create_milestone_and_report(
                user=None,
                distinct_id=tenant_id,
@@ -200,35 +200,14 @@ async def rollback_tenant_provisioning(tenant_id: str) -> None:


 def configure_default_api_keys(db_session: Session) -> None:
-    if ANTHROPIC_DEFAULT_API_KEY:
-        anthropic_provider = LLMProviderUpsertRequest(
-            name="Anthropic",
-            provider=ANTHROPIC_PROVIDER_NAME,
-            api_key=ANTHROPIC_DEFAULT_API_KEY,
-            default_model_name="claude-3-7-sonnet-20250219",
-            fast_default_model_name="claude-3-5-sonnet-20241022",
-            model_names=ANTHROPIC_MODEL_NAMES,
-            display_model_names=["claude-3-5-sonnet-20241022"],
-        )
-        try:
-            full_provider = upsert_llm_provider(anthropic_provider, db_session)
-            update_default_provider(full_provider.id, db_session)
-        except Exception as e:
-            logger.error(f"Failed to configure Anthropic provider: {e}")
-    else:
-        logger.error(
-            "ANTHROPIC_DEFAULT_API_KEY not set, skipping Anthropic provider configuration"
-        )
-
    if OPENAI_DEFAULT_API_KEY:
        open_provider = LLMProviderUpsertRequest(
            name="OpenAI",
            provider=OPENAI_PROVIDER_NAME,
            api_key=OPENAI_DEFAULT_API_KEY,
-            default_model_name="gpt-4o",
+            default_model_name="gpt-4",
            fast_default_model_name="gpt-4o-mini",
            model_names=OPEN_AI_MODEL_NAMES,
-            display_model_names=["o1", "o3-mini", "gpt-4o", "gpt-4o-mini"],
        )
        try:
            full_provider = upsert_llm_provider(open_provider, db_session)
@@ -240,6 +219,25 @@ def configure_default_api_keys(db_session: Session) -> None:
            "OPENAI_DEFAULT_API_KEY not set, skipping OpenAI provider configuration"
        )

+    if ANTHROPIC_DEFAULT_API_KEY:
+        anthropic_provider = LLMProviderUpsertRequest(
+            name="Anthropic",
+            provider=ANTHROPIC_PROVIDER_NAME,
+            api_key=ANTHROPIC_DEFAULT_API_KEY,
+            default_model_name="claude-3-5-sonnet-20241022",
+            fast_default_model_name="claude-3-5-sonnet-20241022",
+            model_names=ANTHROPIC_MODEL_NAMES,
+        )
+        try:
+            full_provider = upsert_llm_provider(anthropic_provider, db_session)
+            update_default_provider(full_provider.id, db_session)
+        except Exception as e:
+            logger.error(f"Failed to configure Anthropic provider: {e}")
+    else:
+        logger.error(
+            "ANTHROPIC_DEFAULT_API_KEY not set, skipping Anthropic provider configuration"
+        )
+
    if COHERE_DEFAULT_API_KEY:
        cloud_embedding_provider = CloudEmbeddingProviderCreationRequest(
            provider_type=EmbeddingProvider.COHERE,
--- a/backend/ee/onyx/server/tenants/user_mapping.py
+++ b/backend/ee/onyx/server/tenants/user_mapping.py
@@ -28,7 +28,7 @@ def get_tenant_id_for_email(email: str) -> str:


 def user_owns_a_tenant(email: str) -> bool:
-    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
+    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
        result = (
            db_session.query(UserTenantMapping)
            .filter(UserTenantMapping.email == email)
@@ -38,7 +38,7 @@ def user_owns_a_tenant(email: str) -> bool:


 def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
-    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
+    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
        try:
            for email in emails:
                db_session.add(UserTenantMapping(email=email, tenant_id=tenant_id))
@@ -48,7 +48,7 @@ def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:


 def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:
-    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
+    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
        try:
            mappings_to_delete = (
                db_session.query(UserTenantMapping)
@@ -71,7 +71,7 @@ def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:


 def remove_all_users_from_tenant(tenant_id: str) -> None:
-    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
+    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
        db_session.query(UserTenantMapping).filter(
            UserTenantMapping.tenant_id == tenant_id
        ).delete()
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -98,17 +98,12 @@ class CloudEmbedding:
            return final_embeddings
        except Exception as e:
            error_string = (
-                f"Exception embedding text with OpenAI - {type(e)}: "
-                f"Model: {model} "
-                f"Provider: {self.provider} "
-                f"Exception: {e}"
+                f"Error embedding text with OpenAI: {str(e)} \n"
+                f"Model: {model} \n"
+                f"Provider: {self.provider} \n"
+                f"Texts: {texts}"
            )
            logger.error(error_string)
-
-            # only log text when it's not an authentication error.
-            if not isinstance(e, openai.AuthenticationError):
-                logger.debug(f"Exception texts: {texts}")
-
            raise RuntimeError(error_string)

    async def _embed_cohere(
--- a/backend/onyx/agents/agent_search/basic/graph_builder.py
+++ b/backend/onyx/agents/agent_search/basic/graph_builder.py
@@ -5,14 +5,14 @@ from langgraph.graph import StateGraph
 from onyx.agents.agent_search.basic.states import BasicInput
 from onyx.agents.agent_search.basic.states import BasicOutput
 from onyx.agents.agent_search.basic.states import BasicState
-from onyx.agents.agent_search.orchestration.nodes.call_tool import call_tool
-from onyx.agents.agent_search.orchestration.nodes.choose_tool import choose_tool
+from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
+    basic_use_tool_response,
+)
+from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
 from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
    prepare_tool_input,
 )
-from onyx.agents.agent_search.orchestration.nodes.use_tool_response import (
-    basic_use_tool_response,
-)
+from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -33,13 +33,13 @@ def basic_graph_builder() -> StateGraph:
    )

    graph.add_node(
-        node="choose_tool",
-        action=choose_tool,
+        node="llm_tool_choice",
+        action=llm_tool_choice,
    )

    graph.add_node(
-        node="call_tool",
-        action=call_tool,
+        node="tool_call",
+        action=tool_call,
    )

    graph.add_node(
@@ -51,12 +51,12 @@ def basic_graph_builder() -> StateGraph:

    graph.add_edge(start_key=START, end_key="prepare_tool_input")

-    graph.add_edge(start_key="prepare_tool_input", end_key="choose_tool")
+    graph.add_edge(start_key="prepare_tool_input", end_key="llm_tool_choice")

-    graph.add_conditional_edges("choose_tool", should_continue, ["call_tool", END])
+    graph.add_conditional_edges("llm_tool_choice", should_continue, ["tool_call", END])

    graph.add_edge(
-        start_key="call_tool",
+        start_key="tool_call",
        end_key="basic_use_tool_response",
    )

@@ -73,7 +73,7 @@ def should_continue(state: BasicState) -> str:
        # If there are no tool calls, basic graph already streamed the answer
        END
        if state.tool_choice is None
-        else "call_tool"
+        else "tool_call"
    )


@@ -85,7 +85,7 @@ if __name__ == "__main__":

    graph = basic_graph_builder()
    compiled_graph = graph.compile()
-    input = BasicInput(unused=True)
+    input = BasicInput(_unused=True)
    primary_llm, fast_llm = get_default_llms()
    with get_session_context_manager() as db_session:
        config, _ = get_test_config(
--- a/backend/onyx/agents/agent_search/basic/states.py
+++ b/backend/onyx/agents/agent_search/basic/states.py
@@ -17,7 +17,7 @@ from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
 class BasicInput(BaseModel):
    # Langgraph needs a nonempty input, but we pass in all static
    # data through a RunnableConfig.
-    unused: bool = True
+    _unused: bool = True


 ## Graph Output State
--- a/backend/onyx/agents/agent_search/core_state.py
+++ b/backend/onyx/agents/agent_search/core_state.py
@@ -9,6 +9,7 @@ class CoreState(BaseModel):
    This is the core state that is shared across all subgraphs.
    """

+    base_question: str = ""
    log_messages: Annotated[list[str], add] = []


@@ -17,4 +18,4 @@ class SubgraphCoreState(BaseModel):
    This is the core state that is shared across all subgraphs.
    """

-    log_messages: Annotated[list[str], add] = []
+    log_messages: Annotated[list[str], add]
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
@@ -1,8 +1,8 @@
 from datetime import datetime
 from typing import cast

-from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
+from langchain_core.messages import merge_message_runs
 from langchain_core.runnables.config import RunnableConfig

 from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
@@ -12,45 +12,14 @@ from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer
    SubQuestionAnswerCheckUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
-from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
-    binary_string_test,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_POSITIVE_VALUE_STR,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import AgentLLMErrorType
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
-from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_CHECK
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="LLM Timeout Error. The sub-answer will be treated as 'relevant'",
-    rate_limit="LLM Rate Limit Error. The sub-answer will be treated as 'relevant'",
-    general_error="General LLM Error. The sub-answer will be treated as 'relevant'",
-)


-@log_function_time(print_only=True)
 def check_sub_answer(
    state: AnswerQuestionState, config: RunnableConfig
 ) -> SubQuestionAnswerCheckUpdate:
@@ -84,42 +53,14 @@ def check_sub_answer(

    graph_config = cast(GraphConfig, config["metadata"]["config"])
    fast_llm = graph_config.tooling.fast_llm
-    agent_error: AgentErrorLog | None = None
-    response: BaseMessage | None = None
-    try:
-        response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_SUBANSWER_CHECK,
-            fast_llm.invoke,
+    response = list(
+        fast_llm.stream(
            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK,
        )
+    )

-        quality_str: str = cast(str, response.content)
-        answer_quality = binary_string_test(
-            text=quality_str, positive_value=AGENT_POSITIVE_VALUE_STR
-        )
-        log_result = f"Answer quality: {quality_str}"
-
-    except (LLMTimeoutError, TimeoutError):
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.TIMEOUT,
-            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-            error_result=_llm_node_error_strings.timeout,
-        )
-        answer_quality = True
-        log_result = agent_error.error_result
-        logger.error("LLM Timeout Error - check sub answer")
-
-    except LLMRateLimitError:
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.RATE_LIMIT,
-            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-            error_result=_llm_node_error_strings.rate_limit,
-        )
-
-        answer_quality = True
-        log_result = agent_error.error_result
-        logger.error("LLM Rate Limit Error - check sub answer")
+    quality_str: str = merge_message_runs(response, chunk_separator="")[0].content
+    answer_quality = "yes" in quality_str.lower()

    return SubQuestionAnswerCheckUpdate(
        answer_quality=answer_quality,
@@ -128,7 +69,7 @@ def check_sub_answer(
                graph_component="initial  - generate individual sub answer",
                node_name="check sub answer",
                node_start_time=node_start_time,
-                result=log_result,
+                result=f"Answer quality: {quality_str}",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from typing import Any
 from typing import cast

 from langchain_core.messages import merge_message_runs
@@ -15,23 +16,6 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_sub_question_answer_prompt,
 )
-from onyx.agents.agent_search.shared_graph_utils.calculations import (
-    dedup_sort_inference_section_list,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AgentLLMErrorType,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    LLM_ANSWER_ERROR_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
@@ -46,25 +30,12 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
-from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import NO_RECOVERED_DOCS
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time

 logger = setup_logger()

-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="LLM Timeout Error. A sub-answer could not be constructed and the sub-question will be ignored.",
-    rate_limit="LLM Rate Limit Error. A sub-answer could not be constructed and the sub-question will be ignored.",
-    general_error="General LLM Error. A sub-answer could not be constructed and the sub-question will be ignored.",
-)

-
-@log_function_time(print_only=True)
 def generate_sub_answer(
    state: AnswerQuestionState,
    config: RunnableConfig,
@@ -80,17 +51,12 @@ def generate_sub_answer(
    state.verified_reranked_documents
    level, question_num = parse_question_id(state.question_id)
    context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
-
-    context_docs = dedup_sort_inference_section_list(context_docs)
-
    persona_contextualized_prompt = get_persona_agent_prompt_expressions(
        graph_config.inputs.search_request.persona
    ).contextualized_prompt

    if len(context_docs) == 0:
        answer_str = NO_RECOVERED_DOCS
-        cited_documents: list = []
-        log_results = "No documents retrieved"
        write_custom_event(
            "sub_answers",
            AgentAnswerPiece(
@@ -111,75 +77,43 @@ def generate_sub_answer(
            config=fast_llm.config,
        )

+        response: list[str | list[str | dict[str, Any]]] = []
        dispatch_timings: list[float] = []
-        agent_error: AgentErrorLog | None = None
-        response: list[str] = []
-
-        def stream_sub_answer() -> list[str]:
-            for message in fast_llm.stream(
-                prompt=msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION,
-            ):
-                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-                content = message.content
-                if not isinstance(content, str):
-                    raise ValueError(
-                        f"Expected content to be a string, but got {type(content)}"
-                    )
-                start_stream_token = datetime.now()
-                write_custom_event(
-                    "sub_answers",
-                    AgentAnswerPiece(
-                        answer_piece=content,
-                        level=level,
-                        level_question_num=question_num,
-                        answer_type="agent_sub_answer",
-                    ),
-                    writer,
+        for message in fast_llm.stream(
+            prompt=msg,
+        ):
+            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+            content = message.content
+            if not isinstance(content, str):
+                raise ValueError(
+                    f"Expected content to be a string, but got {type(content)}"
                )
-                end_stream_token = datetime.now()
-                dispatch_timings.append(
-                    (end_stream_token - start_stream_token).microseconds
-                )
-                response.append(content)
-            return response
-
-        try:
-            response = run_with_timeout(
-                AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION,
-                stream_sub_answer,
+            start_stream_token = datetime.now()
+            write_custom_event(
+                "sub_answers",
+                AgentAnswerPiece(
+                    answer_piece=content,
+                    level=level,
+                    level_question_num=question_num,
+                    answer_type="agent_sub_answer",
+                ),
+                writer,
            )
-
-        except (LLMTimeoutError, TimeoutError):
-            agent_error = AgentErrorLog(
-                error_type=AgentLLMErrorType.TIMEOUT,
-                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-                error_result=_llm_node_error_strings.timeout,
+            end_stream_token = datetime.now()
+            dispatch_timings.append(
+                (end_stream_token - start_stream_token).microseconds
            )
-            logger.error("LLM Timeout Error - generate sub answer")
-        except LLMRateLimitError:
-            agent_error = AgentErrorLog(
-                error_type=AgentLLMErrorType.RATE_LIMIT,
-                error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-                error_result=_llm_node_error_strings.rate_limit,
-            )
-            logger.error("LLM Rate Limit Error - generate sub answer")
+            response.append(content)

-        if agent_error:
-            answer_str = LLM_ANSWER_ERROR_MESSAGE
-            cited_documents = []
-            log_results = (
-                agent_error.error_result
-                or "Sub-answer generation failed due to LLM error"
-            )
+        answer_str = merge_message_runs(response, chunk_separator="")[0].content
+        logger.debug(
+            f"Average dispatch time: {sum(dispatch_timings) / len(dispatch_timings)}"
+        )

-        else:
-            answer_str = merge_message_runs(response, chunk_separator="")[0].content
-            answer_citation_ids = get_answer_citation_ids(answer_str)
-            cited_documents = [
-                context_docs[id] for id in answer_citation_ids if id < len(context_docs)
-            ]
-            log_results = None
+    answer_citation_ids = get_answer_citation_ids(answer_str)
+    cited_documents = [
+        context_docs[id] for id in answer_citation_ids if id < len(context_docs)
+    ]

    stop_event = StreamStopInfo(
        stop_reason=StreamStopReason.FINISHED,
@@ -197,7 +131,7 @@ def generate_sub_answer(
                graph_component="initial - generate individual sub answer",
                node_name="generate sub answer",
                node_start_time=node_start_time,
-                result=log_results or "",
+                result="",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/states.py
@@ -42,8 +42,10 @@ class SubQuestionRetrievalIngestionUpdate(LoggerUpdate, BaseModel):


 class SubQuestionAnsweringInput(SubgraphCoreState):
-    question: str
-    question_id: str
+    question: str = ""
+    question_id: str = (
+        ""  # 0_0 is original question, everything else is <level>_<question_num>.
+    )
    # level 0 is original question and first decomposition, level 1 is follow up, etc
    # question_num is a unique number per original question per level.

--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from typing import Any
 from typing import cast

 from langchain_core.messages import HumanMessage
@@ -25,31 +26,14 @@ from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
-from onyx.agents.agent_search.shared_graph_utils.calculations import (
-    get_answer_generation_documents,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AgentLLMErrorType,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
 from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResultStats
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.operators import (
-    dedup_inference_section_list,
+    dedup_inference_sections,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    dispatch_main_answer_stop_info,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
-from onyx.agents.agent_search.shared_graph_utils.utils import (
-    get_deduplicated_structured_subquestion_documents,
-)
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
@@ -58,20 +42,12 @@ from onyx.agents.agent_search.shared_graph_utils.utils import remove_document_ci
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
-from onyx.chat.models import StreamingError
-from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
-from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
+from onyx.context.search.models import InferenceSection
+from onyx.prompts.agent_search import (
+    INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS,
 )
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
-)
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
-from onyx.prompts.agent_search import INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS
 from onyx.prompts.agent_search import (
    INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
@@ -80,17 +56,8 @@ from onyx.prompts.agent_search import (
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="LLM Timeout Error. The initial answer could not be generated.",
-    rate_limit="LLM Rate Limit Error. The initial answer could not be generated.",
-    general_error="General LLM Error. The initial answer could not be generated.",
-)


-@log_function_time(print_only=True)
 def generate_initial_answer(
    state: SubQuestionRetrievalState,
    config: RunnableConfig,
@@ -106,19 +73,15 @@ def generate_initial_answer(
    question = graph_config.inputs.search_request.query
    prompt_enrichment_components = get_prompt_enrichment_components(graph_config)

-    # get all documents cited in sub-questions
-    structured_subquestion_docs = get_deduplicated_structured_subquestion_documents(
-        state.sub_question_results
-    )
-
+    sub_questions_cited_documents = state.cited_documents
    orig_question_retrieval_documents = state.orig_question_retrieved_documents

-    consolidated_context_docs = structured_subquestion_docs.cited_documents
+    consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
    counter = 0
    for original_doc_number, original_doc in enumerate(
        orig_question_retrieval_documents
    ):
-        if original_doc_number not in structured_subquestion_docs.cited_documents:
+        if original_doc_number not in sub_questions_cited_documents:
            if (
                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
                or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
@@ -127,18 +90,15 @@ def generate_initial_answer(
                counter += 1

    # sort docs by their scores - though the scores refer to different questions
-    relevant_docs = dedup_inference_section_list(consolidated_context_docs)
+    relevant_docs = dedup_inference_sections(
+        consolidated_context_docs, consolidated_context_docs
+    )

    sub_questions: list[str] = []
-
-    # Create the list of documents to stream out. Start with the
-    # ones that wil be in the context (or, if len == 0, use docs
-    # that were retrieved for the original question)
-    answer_generation_documents = get_answer_generation_documents(
-        relevant_docs=relevant_docs,
-        context_documents=structured_subquestion_docs.context_documents,
-        original_question_docs=orig_question_retrieval_documents,
-        max_docs=AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER,
+    streamed_documents = (
+        relevant_docs
+        if len(relevant_docs) > 0
+        else state.orig_question_retrieved_documents[:15]
    )

    # Use the query info from the base document retrieval
@@ -148,13 +108,11 @@ def generate_initial_answer(
        graph_config.tooling.search_tool
    ), "search_tool must be provided for agentic search"

-    relevance_list = relevance_from_docs(
-        answer_generation_documents.streaming_documents
-    )
+    relevance_list = relevance_from_docs(relevant_docs)
    for tool_response in yield_search_responses(
        query=question,
-        reranked_sections=answer_generation_documents.streaming_documents,
-        final_context_sections=answer_generation_documents.context_documents,
+        reranked_sections=streamed_documents,
+        final_context_sections=streamed_documents,
        search_query_info=query_info,
        get_section_relevance=lambda: relevance_list,
        search_tool=graph_config.tooling.search_tool,
@@ -170,7 +128,7 @@ def generate_initial_answer(
            writer,
        )

-    if len(answer_generation_documents.context_documents) == 0:
+    if len(relevant_docs) == 0:
        write_custom_event(
            "initial_agent_answer",
            AgentAnswerPiece(
@@ -234,13 +192,9 @@ def generate_initial_answer(

        sub_questions = all_sub_questions  # Replace the original assignment

-        model = (
-            graph_config.tooling.fast_llm
-            if AGENT_ANSWER_GENERATION_BY_FAST_LLM
-            else graph_config.tooling.primary_llm
-        )
+        model = graph_config.tooling.fast_llm

-        doc_context = format_docs(answer_generation_documents.context_documents)
+        doc_context = format_docs(relevant_docs)
        doc_context = trim_prompt_piece(
            config=model.config,
            prompt_piece=doc_context,
@@ -268,92 +222,32 @@ def generate_initial_answer(
            )
        ]

-        streamed_tokens: list[str] = [""]
+        streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
        dispatch_timings: list[float] = []
-
-        agent_error: AgentErrorLog | None = None
-
-        def stream_initial_answer() -> list[str]:
-            response: list[str] = []
-            for message in model.stream(
-                msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
-            ):
-                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-                content = message.content
-                if not isinstance(content, str):
-                    raise ValueError(
-                        f"Expected content to be a string, but got {type(content)}"
-                    )
-                start_stream_token = datetime.now()
-
-                write_custom_event(
-                    "initial_agent_answer",
-                    AgentAnswerPiece(
-                        answer_piece=content,
-                        level=0,
-                        level_question_num=0,
-                        answer_type="agent_level_answer",
-                    ),
-                    writer,
+        for message in model.stream(msg):
+            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+            content = message.content
+            if not isinstance(content, str):
+                raise ValueError(
+                    f"Expected content to be a string, but got {type(content)}"
                )
-                end_stream_token = datetime.now()
-                dispatch_timings.append(
-                    (end_stream_token - start_stream_token).microseconds
-                )
-                response.append(content)
-            return response
+            start_stream_token = datetime.now()

-        try:
-            streamed_tokens = run_with_timeout(
-                AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
-                stream_initial_answer,
-            )
-
-        except (LLMTimeoutError, TimeoutError):
-            agent_error = AgentErrorLog(
-                error_type=AgentLLMErrorType.TIMEOUT,
-                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-                error_result=_llm_node_error_strings.timeout,
-            )
-            logger.error("LLM Timeout Error - generate initial answer")
-
-        except LLMRateLimitError:
-            agent_error = AgentErrorLog(
-                error_type=AgentLLMErrorType.RATE_LIMIT,
-                error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-                error_result=_llm_node_error_strings.rate_limit,
-            )
-            logger.error("LLM Rate Limit Error - generate initial answer")
-
-        if agent_error:
            write_custom_event(
                "initial_agent_answer",
-                StreamingError(
-                    error=AGENT_LLM_TIMEOUT_MESSAGE,
+                AgentAnswerPiece(
+                    answer_piece=content,
+                    level=0,
+                    level_question_num=0,
+                    answer_type="agent_level_answer",
                ),
                writer,
            )
-            return InitialAnswerUpdate(
-                initial_answer=None,
-                answer_error=AgentErrorLog(
-                    error_message=agent_error.error_message or "An LLM error occurred",
-                    error_type=agent_error.error_type,
-                    error_result=agent_error.error_result,
-                ),
-                initial_agent_stats=None,
-                generated_sub_questions=sub_questions,
-                agent_base_end_time=None,
-                agent_base_metrics=None,
-                log_messages=[
-                    get_langgraph_node_log_string(
-                        graph_component="initial - generate initial answer",
-                        node_name="generate initial answer",
-                        node_start_time=node_start_time,
-                        result=agent_error.error_result or "An LLM error occurred",
-                    )
-                ],
+            end_stream_token = datetime.now()
+            dispatch_timings.append(
+                (end_stream_token - start_stream_token).microseconds
            )
+            streamed_tokens.append(content)

        logger.debug(
            f"Average dispatch time for initial answer: {sum(dispatch_timings) / len(dispatch_timings)}"
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/validate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/validate_initial_answer.py
@@ -10,10 +10,8 @@ from onyx.agents.agent_search.deep_search.main.states import (
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
-from onyx.utils.timing import log_function_time


-@log_function_time(print_only=True)
 def validate_initial_answer(
    state: SubQuestionRetrievalState,
 ) -> InitialAnswerQualityUpdate:
@@ -27,7 +25,7 @@ def validate_initial_answer(
        f"--------{node_start_time}--------Checking for base answer validity - for not set True/False manually"
    )

-    verdict = True  # not actually required as already streamed out. Refinement will do similar
+    verdict = True

    return InitialAnswerQualityUpdate(
        initial_answer_quality_eval=verdict,
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
@@ -23,8 +23,6 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_history_prompt,
 )
-from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
@@ -35,34 +33,17 @@ from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
 from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION,
-)
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
-    INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT,
+    INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH,
 )
 from onyx.prompts.agent_search import (
-    INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT,
+    INITIAL_QUESTION_DECOMPOSITION_PROMPT,
 )
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time

 logger = setup_logger()

-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="LLM Timeout Error. Sub-questions could not be generated.",
-    rate_limit="LLM Rate Limit Error. Sub-questions could not be generated.",
-    general_error="General LLM Error. Sub-questions could not be generated.",
-)

-
-@log_function_time(print_only=True)
 def decompose_orig_question(
    state: SubQuestionRetrievalState,
    config: RunnableConfig,
@@ -104,15 +85,15 @@ def decompose_orig_question(
            ]
        )

-        decomposition_prompt = INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT.format(
-            question=question, sample_doc_str=sample_doc_str, history=history
+        decomposition_prompt = (
+            INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH.format(
+                question=question, sample_doc_str=sample_doc_str, history=history
+            )
        )

    else:
-        decomposition_prompt = (
-            INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT.format(
-                question=question, history=history
-            )
+        decomposition_prompt = INITIAL_QUESTION_DECOMPOSITION_PROMPT.format(
+            question=question, history=history
        )

    # Start decomposition
@@ -131,44 +112,32 @@ def decompose_orig_question(
    )

    # dispatches custom events for subquestion tokens, adding in subquestion ids.
+    streamed_tokens = dispatch_separated(
+        model.stream(msg),
+        dispatch_subquestion(0, writer),
+        sep_callback=dispatch_subquestion_sep(0, writer),
+    )

-    streamed_tokens: list[BaseMessage_Content] = []
+    stop_event = StreamStopInfo(
+        stop_reason=StreamStopReason.FINISHED,
+        stream_type=StreamType.SUB_QUESTIONS,
+        level=0,
+    )
+    write_custom_event("stream_finished", stop_event, writer)

-    try:
-        streamed_tokens = run_with_timeout(
-            AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION,
-            dispatch_separated,
-            model.stream(
-                msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
-            ),
-            dispatch_subquestion(0, writer),
-            sep_callback=dispatch_subquestion_sep(0, writer),
-        )
+    deomposition_response = merge_content(*streamed_tokens)

-        decomposition_response = merge_content(*streamed_tokens)
+    # this call should only return strings. Commenting out for efficiency
+    # assert [type(tok) == str for tok in streamed_tokens]

-        list_of_subqs = cast(str, decomposition_response).split("\n")
+    # use no-op cast() instead of str() which runs code
+    # list_of_subquestions = clean_and_parse_list_string(cast(str, response))
+    list_of_subqs = cast(str, deomposition_response).split("\n")

-        initial_sub_questions = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
-        log_result = f"decomposed original question into {len(initial_sub_questions)} subquestions"
-
-        stop_event = StreamStopInfo(
-            stop_reason=StreamStopReason.FINISHED,
-            stream_type=StreamType.SUB_QUESTIONS,
-            level=0,
-        )
-        write_custom_event("stream_finished", stop_event, writer)
-
-    except (LLMTimeoutError, TimeoutError) as e:
-        logger.error("LLM Timeout Error - decompose orig question")
-        raise e  # fail loudly on this critical step
-    except LLMRateLimitError as e:
-        logger.error("LLM Rate Limit Error - decompose orig question")
-        raise e
+    decomp_list: list[str] = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]

    return InitialQuestionDecompositionUpdate(
-        initial_sub_questions=initial_sub_questions,
+        initial_sub_questions=decomp_list,
        agent_start_time=agent_start_time,
        agent_refined_start_time=None,
        agent_refined_end_time=None,
@@ -182,7 +151,7 @@ def decompose_orig_question(
                graph_component="initial - generate sub answers",
                node_name="decompose original question",
                node_start_time=node_start_time,
-                result=log_result,
+                result=f"decomposed original question into {len(decomp_list)} subquestions",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/edges.py
@@ -25,7 +25,7 @@ logger = setup_logger()

 def route_initial_tool_choice(
    state: MainState, config: RunnableConfig
-) -> Literal["call_tool", "start_agent_search", "logging_node"]:
+) -> Literal["tool_call", "start_agent_search", "logging_node"]:
    """
    LangGraph edge to route to agent search.
    """
@@ -38,7 +38,7 @@ def route_initial_tool_choice(
        ):
            return "start_agent_search"
        else:
-            return "call_tool"
+            return "tool_call"
    else:
        return "logging_node"

--- a/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
@@ -26,8 +26,8 @@ from onyx.agents.agent_search.deep_search.main.nodes.decide_refinement_need impo
 from onyx.agents.agent_search.deep_search.main.nodes.extract_entities_terms import (
    extract_entities_terms,
 )
-from onyx.agents.agent_search.deep_search.main.nodes.generate_validate_refined_answer import (
-    generate_validate_refined_answer,
+from onyx.agents.agent_search.deep_search.main.nodes.generate_refined_answer import (
+    generate_refined_answer,
 )
 from onyx.agents.agent_search.deep_search.main.nodes.ingest_refined_sub_answers import (
    ingest_refined_sub_answers,
@@ -43,14 +43,14 @@ from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.deep_search.refinement.consolidate_sub_answers.graph_builder import (
    answer_refined_query_graph_builder,
 )
-from onyx.agents.agent_search.orchestration.nodes.call_tool import call_tool
-from onyx.agents.agent_search.orchestration.nodes.choose_tool import choose_tool
+from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
+    basic_use_tool_response,
+)
+from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
 from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
    prepare_tool_input,
 )
-from onyx.agents.agent_search.orchestration.nodes.use_tool_response import (
-    basic_use_tool_response,
-)
+from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
 from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
 from onyx.utils.logger import setup_logger

@@ -77,13 +77,13 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
    # Choose the initial tool
    graph.add_node(
        node="initial_tool_choice",
-        action=choose_tool,
+        action=llm_tool_choice,
    )

    # Call the tool, if required
    graph.add_node(
-        node="call_tool",
-        action=call_tool,
+        node="tool_call",
+        action=tool_call,
    )

    # Use the tool response
@@ -126,8 +126,8 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:

    # Node to generate the refined answer
    graph.add_node(
-        node="generate_validate_refined_answer",
-        action=generate_validate_refined_answer,
+        node="generate_refined_answer",
+        action=generate_refined_answer,
    )

    # Early node to extract the entities and terms from the initial answer,
@@ -168,11 +168,11 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
    graph.add_conditional_edges(
        "initial_tool_choice",
        route_initial_tool_choice,
-        ["call_tool", "start_agent_search", "logging_node"],
+        ["tool_call", "start_agent_search", "logging_node"],
    )

    graph.add_edge(
-        start_key="call_tool",
+        start_key="tool_call",
        end_key="basic_use_tool_response",
    )
    graph.add_edge(
@@ -215,11 +215,11 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:

    graph.add_edge(
        start_key="ingest_refined_sub_answers",
-        end_key="generate_validate_refined_answer",
+        end_key="generate_refined_answer",
    )

    graph.add_edge(
-        start_key="generate_validate_refined_answer",
+        start_key="generate_refined_answer",
        end_key="compare_answers",
    )
    graph.add_edge(
@@ -252,7 +252,9 @@ if __name__ == "__main__":
            db_session, primary_llm, fast_llm, search_request
        )

-        inputs = MainInput(log_messages=[])
+        inputs = MainInput(
+            base_question=graph_config.inputs.search_request.query, log_messages=[]
+        )

        for thing in compiled_graph.stream(
            input=inputs,
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
@@ -1,7 +1,6 @@
 from datetime import datetime
 from typing import cast

-from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.runnables import RunnableConfig
 from langgraph.types import StreamWriter
@@ -11,53 +10,16 @@ from onyx.agents.agent_search.deep_search.main.states import (
 )
 from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.models import GraphConfig
-from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
-    binary_string_test,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_POSITIVE_VALUE_STR,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AgentLLMErrorType,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import RefinedAnswerImprovement
-from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_COMPARE_ANSWERS
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
 )
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="The LLM timed out, and the answers could not be compared.",
-    rate_limit="The LLM encountered a rate limit, and the answers could not be compared.",
-    general_error="The LLM encountered an error, and the answers could not be compared.",
-)
-
-_ANSWER_QUALITY_NOT_SUFFICIENT_MESSAGE = (
-    "Answer quality is not sufficient, so stay with the initial answer."
-)


-@log_function_time(print_only=True)
 def compare_answers(
    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> InitialRefinedAnswerComparisonUpdate:
@@ -72,78 +34,21 @@ def compare_answers(
    initial_answer = state.initial_answer
    refined_answer = state.refined_answer

-    # if answer quality is not sufficient, then stay with the initial answer
-    if not state.refined_answer_quality:
-        write_custom_event(
-            "refined_answer_improvement",
-            RefinedAnswerImprovement(
-                refined_answer_improvement=False,
-            ),
-            writer,
-        )
-
-        return InitialRefinedAnswerComparisonUpdate(
-            refined_answer_improvement_eval=False,
-            log_messages=[
-                get_langgraph_node_log_string(
-                    graph_component="main",
-                    node_name="compare answers",
-                    node_start_time=node_start_time,
-                    result=_ANSWER_QUALITY_NOT_SUFFICIENT_MESSAGE,
-                )
-            ],
-        )
-
    compare_answers_prompt = INITIAL_REFINED_ANSWER_COMPARISON_PROMPT.format(
        question=question, initial_answer=initial_answer, refined_answer=refined_answer
    )

    msg = [HumanMessage(content=compare_answers_prompt)]

-    agent_error: AgentErrorLog | None = None
    # Get the rewritten queries in a defined format
    model = graph_config.tooling.fast_llm
-    resp: BaseMessage | None = None
-    refined_answer_improvement: bool | None = None
+
    # no need to stream this
-    try:
-        resp = run_with_timeout(
-            AGENT_TIMEOUT_LLM_COMPARE_ANSWERS,
-            model.invoke,
-            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS,
-        )
+    resp = model.invoke(msg)

-    except (LLMTimeoutError, TimeoutError):
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.TIMEOUT,
-            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-            error_result=_llm_node_error_strings.timeout,
-        )
-        logger.error("LLM Timeout Error - compare answers")
-        # continue as True in this support step
-    except LLMRateLimitError:
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.RATE_LIMIT,
-            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-            error_result=_llm_node_error_strings.rate_limit,
-        )
-        logger.error("LLM Rate Limit Error - compare answers")
-        # continue as True in this support step
-
-    if agent_error or resp is None:
-        refined_answer_improvement = True
-        if agent_error:
-            log_result = agent_error.error_result
-        else:
-            log_result = "An answer could not be generated."
-
-    else:
-        refined_answer_improvement = binary_string_test(
-            text=cast(str, resp.content),
-            positive_value=AGENT_POSITIVE_VALUE_STR,
-        )
-        log_result = f"Answer comparison: {refined_answer_improvement}"
+    refined_answer_improvement = (
+        isinstance(resp.content, str) and "yes" in resp.content.lower()
+    )

    write_custom_event(
        "refined_answer_improvement",
@@ -160,7 +65,7 @@ def compare_answers(
                graph_component="main",
                node_name="compare answers",
                node_start_time=node_start_time,
-                result=log_result,
+                result=f"Answer comparison: {refined_answer_improvement}",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
@@ -21,18 +21,6 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_history_prompt,
 )
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AgentLLMErrorType,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
-from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    format_entity_term_extraction,
@@ -42,35 +30,12 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
-from onyx.chat.models import StreamingError
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION,
-)
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
-    REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS,
+    REFINEMENT_QUESTION_DECOMPOSITION_PROMPT,
 )
 from onyx.tools.models import ToolCallKickoff
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-_ANSWERED_SUBQUESTIONS_DIVIDER = "\n\n---\n\n"
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="The LLM timed out. The sub-questions could not be generated.",
-    rate_limit="The LLM encountered a rate limit. The sub-questions could not be generated.",
-    general_error="The LLM encountered an error. The sub-questions could not be generated.",
-)


-@log_function_time(print_only=True)
 def create_refined_sub_questions(
    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> RefinedQuestionDecompositionUpdate:
@@ -107,10 +72,8 @@ def create_refined_sub_questions(

    initial_question_answers = state.sub_question_results

-    addressed_subquestions_with_answers = [
-        f"Subquestion: {x.question}\nSubanswer:\n{x.answer}"
-        for x in initial_question_answers
-        if x.verified_high_quality and x.answer
+    addressed_question_list = [
+        x.question for x in initial_question_answers if x.verified_high_quality
    ]

    failed_question_list = [
@@ -119,14 +82,12 @@ def create_refined_sub_questions(

    msg = [
        HumanMessage(
-            content=REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS.format(
+            content=REFINEMENT_QUESTION_DECOMPOSITION_PROMPT.format(
                question=question,
                history=history,
                entity_term_extraction_str=entity_term_extraction_str,
                base_answer=base_answer,
-                answered_subquestions_with_answers=_ANSWERED_SUBQUESTIONS_DIVIDER.join(
-                    addressed_subquestions_with_answers
-                ),
+                answered_sub_questions="\n - ".join(addressed_question_list),
                failed_sub_questions="\n - ".join(failed_question_list),
            ),
        )
@@ -135,67 +96,29 @@ def create_refined_sub_questions(
    # Grader
    model = graph_config.tooling.fast_llm

-    agent_error: AgentErrorLog | None = None
-    streamed_tokens: list[BaseMessage_Content] = []
-    try:
-        streamed_tokens = run_with_timeout(
-            AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION,
-            dispatch_separated,
-            model.stream(
-                msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
-            ),
-            dispatch_subquestion(1, writer),
-            sep_callback=dispatch_subquestion_sep(1, writer),
-        )
-    except (LLMTimeoutError, TimeoutError):
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.TIMEOUT,
-            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-            error_result=_llm_node_error_strings.timeout,
-        )
-        logger.error("LLM Timeout Error - create refined sub questions")
-
-    except LLMRateLimitError:
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.RATE_LIMIT,
-            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-            error_result=_llm_node_error_strings.rate_limit,
-        )
-        logger.error("LLM Rate Limit Error - create refined sub questions")
-
-    if agent_error:
-        refined_sub_question_dict: dict[int, RefinementSubQuestion] = {}
-        log_result = agent_error.error_result
-        write_custom_event(
-            "refined_sub_question_creation_error",
-            StreamingError(
-                error="Your LLM was not able to create refined sub questions in time and timed out. Please try again.",
-            ),
-            writer,
-        )
+    streamed_tokens = dispatch_separated(
+        model.stream(msg),
+        dispatch_subquestion(1, writer),
+        sep_callback=dispatch_subquestion_sep(1, writer),
+    )
+    response = merge_content(*streamed_tokens)

+    if isinstance(response, str):
+        parsed_response = [q for q in response.split("\n") if q.strip() != ""]
    else:
-        response = merge_content(*streamed_tokens)
+        raise ValueError("LLM response is not a string")

-        if isinstance(response, str):
-            parsed_response = [q for q in response.split("\n") if q.strip() != ""]
-        else:
-            raise ValueError("LLM response is not a string")
+    refined_sub_question_dict = {}
+    for sub_question_num, sub_question in enumerate(parsed_response):
+        refined_sub_question = RefinementSubQuestion(
+            sub_question=sub_question,
+            sub_question_id=make_question_id(1, sub_question_num + 1),
+            verified=False,
+            answered=False,
+            answer="",
+        )

-        refined_sub_question_dict = {}
-        for sub_question_num, sub_question in enumerate(parsed_response):
-            refined_sub_question = RefinementSubQuestion(
-                sub_question=sub_question,
-                sub_question_id=make_question_id(1, sub_question_num + 1),
-                verified=False,
-                answered=False,
-                answer="",
-            )
-
-            refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
-
-        log_result = f"Created {len(refined_sub_question_dict)} refined sub questions"
+        refined_sub_question_dict[sub_question_num + 1] = refined_sub_question

    return RefinedQuestionDecompositionUpdate(
        refined_sub_questions=refined_sub_question_dict,
@@ -205,7 +128,7 @@ def create_refined_sub_questions(
                graph_component="main",
                node_name="create refined sub questions",
                node_start_time=node_start_time,
-                result=log_result,
+                result=f"Created {len(refined_sub_question_dict)} refined sub questions",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
@@ -11,10 +11,8 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
-from onyx.utils.timing import log_function_time


-@log_function_time(print_only=True)
 def decide_refinement_need(
    state: MainState, config: RunnableConfig
 ) -> RequireRefinemenEvalUpdate:
@@ -28,19 +26,6 @@ def decide_refinement_need(

    decision = True  # TODO: just for current testing purposes

-    if state.answer_error:
-        return RequireRefinemenEvalUpdate(
-            require_refined_answer_eval=False,
-            log_messages=[
-                get_langgraph_node_log_string(
-                    graph_component="main",
-                    node_name="decide refinement need",
-                    node_start_time=node_start_time,
-                    result="Timeout Error",
-                )
-            ],
-        )
-
    log_messages = [
        get_langgraph_node_log_string(
            graph_component="main",
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
@@ -21,22 +21,11 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION,
-)
 from onyx.configs.constants import NUM_EXPLORATORY_DOCS
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time


-@log_function_time(print_only=True)
 def extract_entities_terms(
    state: MainState, config: RunnableConfig
 ) -> EntityTermExtractionUpdate:
@@ -90,42 +79,29 @@ def extract_entities_terms(
    ]
    fast_llm = graph_config.tooling.fast_llm
    # Grader
+    llm_response = fast_llm.invoke(
+        prompt=msg,
+    )
+
+    cleaned_response = (
+        str(llm_response.content).replace("```json\n", "").replace("\n```", "")
+    )
+    first_bracket = cleaned_response.find("{")
+    last_bracket = cleaned_response.rfind("}")
+    cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
+
    try:
-        llm_response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION,
-            fast_llm.invoke,
-            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
+        entity_extraction_result = EntityExtractionResult.model_validate_json(
+            cleaned_response
        )
-
-        cleaned_response = (
-            str(llm_response.content).replace("```json\n", "").replace("\n```", "")
-        )
-        first_bracket = cleaned_response.find("{")
-        last_bracket = cleaned_response.rfind("}")
-        cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
-
-        try:
-            entity_extraction_result = EntityExtractionResult.model_validate_json(
-                cleaned_response
-            )
-        except ValueError:
-            logger.error(
-                "Failed to parse LLM response as JSON in Entity-Term Extraction"
-            )
-            entity_extraction_result = EntityExtractionResult(
-                retrieved_entities_relationships=EntityRelationshipTermExtraction(),
-            )
-    except (LLMTimeoutError, TimeoutError):
-        logger.error("LLM Timeout Error - extract entities terms")
+    except ValueError:
+        logger.error("Failed to parse LLM response as JSON in Entity-Term Extraction")
        entity_extraction_result = EntityExtractionResult(
-            retrieved_entities_relationships=EntityRelationshipTermExtraction(),
-        )
-
-    except LLMRateLimitError:
-        logger.error("LLM Rate Limit Error - extract entities terms")
-        entity_extraction_result = EntityExtractionResult(
-            retrieved_entities_relationships=EntityRelationshipTermExtraction(),
+            retrieved_entities_relationships=EntityRelationshipTermExtraction(
+                entities=[],
+                relationships=[],
+                terms=[],
+            ),
        )

    return EntityTermExtractionUpdate(
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from typing import Any
 from typing import cast

 from langchain_core.messages import HumanMessage
@@ -10,49 +11,27 @@ from onyx.agents.agent_search.deep_search.main.models import (
    AgentRefinedMetrics,
 )
 from onyx.agents.agent_search.deep_search.main.operations import get_query_info
+from onyx.agents.agent_search.deep_search.main.operations import logger
 from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.deep_search.main.states import (
    RefinedAnswerUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
-from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
-    binary_string_test_after_answer_separator,
-)
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    get_prompt_enrichment_components,
 )
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
-from onyx.agents.agent_search.shared_graph_utils.calculations import (
-    get_answer_generation_documents,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import AGENT_ANSWER_SEPARATOR
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_POSITIVE_VALUE_STR,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AgentLLMErrorType,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.models import InferenceSection
 from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats
 from onyx.agents.agent_search.shared_graph_utils.operators import (
-    dedup_inference_section_list,
+    dedup_inference_sections,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    dispatch_main_answer_stop_info,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
-from onyx.agents.agent_search.shared_graph_utils.utils import (
-    get_deduplicated_structured_subquestion_documents,
-)
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
@@ -64,58 +43,26 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
-from onyx.chat.models import StreamingError
-from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
-from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION,
-)
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS,
 )
 from onyx.prompts.agent_search import (
    REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
-from onyx.prompts.agent_search import (
-    REFINED_ANSWER_VALIDATION_PROMPT,
-)
 from onyx.prompts.agent_search import (
    SUB_QUESTION_ANSWER_TEMPLATE_REFINED,
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="The LLM timed out. The refined answer could not be generated.",
-    rate_limit="The LLM encountered a rate limit. The refined answer could not be generated.",
-    general_error="The LLM encountered an error. The refined answer could not be generated.",
-)


-@log_function_time(print_only=True)
-def generate_validate_refined_answer(
+def generate_refined_answer(
    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> RefinedAnswerUpdate:
    """
-    LangGraph node to generate the refined answer and validate it.
+    LangGraph node to generate the refined answer.
    """

    node_start_time = datetime.now()
@@ -129,24 +76,19 @@ def generate_validate_refined_answer(
    )

    verified_reranked_documents = state.verified_reranked_documents
-
-    # get all documents cited in sub-questions
-    structured_subquestion_docs = get_deduplicated_structured_subquestion_documents(
-        state.sub_question_results
-    )
-
+    sub_questions_cited_documents = state.cited_documents
    original_question_verified_documents = (
        state.orig_question_verified_reranked_documents
    )
    original_question_retrieved_documents = state.orig_question_retrieved_documents

-    consolidated_context_docs = structured_subquestion_docs.cited_documents
+    consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents

    counter = 0
    for original_doc_number, original_doc in enumerate(
        original_question_verified_documents
    ):
-        if original_doc_number not in structured_subquestion_docs.cited_documents:
+        if original_doc_number not in sub_questions_cited_documents:
            if (
                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
                or len(consolidated_context_docs)
@@ -157,16 +99,14 @@ def generate_validate_refined_answer(
                counter += 1

    # sort docs by their scores - though the scores refer to different questions
-    relevant_docs = dedup_inference_section_list(consolidated_context_docs)
+    relevant_docs = dedup_inference_sections(
+        consolidated_context_docs, consolidated_context_docs
+    )

-    # Create the list of documents to stream out. Start with the
-    # ones that wil be in the context (or, if len == 0, use docs
-    # that were retrieved for the original question)
-    answer_generation_documents = get_answer_generation_documents(
-        relevant_docs=relevant_docs,
-        context_documents=structured_subquestion_docs.context_documents,
-        original_question_docs=original_question_retrieved_documents,
-        max_docs=AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER,
+    streaming_docs = (
+        relevant_docs
+        if len(relevant_docs) > 0
+        else original_question_retrieved_documents[:15]
    )

    query_info = get_query_info(state.orig_question_sub_query_retrieval_results)
@@ -174,13 +114,11 @@ def generate_validate_refined_answer(
        graph_config.tooling.search_tool
    ), "search_tool must be provided for agentic search"
    # stream refined answer docs, or original question docs if no relevant docs are found
-    relevance_list = relevance_from_docs(
-        answer_generation_documents.streaming_documents
-    )
+    relevance_list = relevance_from_docs(relevant_docs)
    for tool_response in yield_search_responses(
        query=question,
-        reranked_sections=answer_generation_documents.streaming_documents,
-        final_context_sections=answer_generation_documents.context_documents,
+        reranked_sections=streaming_docs,
+        final_context_sections=streaming_docs,
        search_query_info=query_info,
        get_section_relevance=lambda: relevance_list,
        search_tool=graph_config.tooling.search_tool,
@@ -260,13 +198,8 @@ def generate_validate_refined_answer(
        else REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS
    )

-    model = (
-        graph_config.tooling.fast_llm
-        if AGENT_ANSWER_GENERATION_BY_FAST_LLM
-        else graph_config.tooling.primary_llm
-    )
-
-    relevant_docs_str = format_docs(answer_generation_documents.context_documents)
+    model = graph_config.tooling.fast_llm
+    relevant_docs_str = format_docs(relevant_docs)
    relevant_docs_str = trim_prompt_piece(
        model.config,
        relevant_docs_str,
@@ -296,89 +229,30 @@ def generate_validate_refined_answer(
        )
    ]

-    streamed_tokens: list[str] = [""]
+    streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
    dispatch_timings: list[float] = []
-    agent_error: AgentErrorLog | None = None
-
-    def stream_refined_answer() -> list[str]:
-        for message in model.stream(
-            msg, timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
-        ):
-            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-            content = message.content
-            if not isinstance(content, str):
-                raise ValueError(
-                    f"Expected content to be a string, but got {type(content)}"
-                )
-
-            start_stream_token = datetime.now()
-            write_custom_event(
-                "refined_agent_answer",
-                AgentAnswerPiece(
-                    answer_piece=content,
-                    level=1,
-                    level_question_num=0,
-                    answer_type="agent_level_answer",
-                ),
-                writer,
+    for message in model.stream(msg):
+        # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+        content = message.content
+        if not isinstance(content, str):
+            raise ValueError(
+                f"Expected content to be a string, but got {type(content)}"
            )
-            end_stream_token = datetime.now()
-            dispatch_timings.append(
-                (end_stream_token - start_stream_token).microseconds
-            )
-            streamed_tokens.append(content)
-        return streamed_tokens

-    try:
-        streamed_tokens = run_with_timeout(
-            AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION,
-            stream_refined_answer,
-        )
-
-    except (LLMTimeoutError, TimeoutError):
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.TIMEOUT,
-            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-            error_result=_llm_node_error_strings.timeout,
-        )
-        logger.error("LLM Timeout Error - generate refined answer")
-
-    except LLMRateLimitError:
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.RATE_LIMIT,
-            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-            error_result=_llm_node_error_strings.rate_limit,
-        )
-        logger.error("LLM Rate Limit Error - generate refined answer")
-
-    if agent_error:
+        start_stream_token = datetime.now()
        write_custom_event(
-            "initial_agent_answer",
-            StreamingError(
-                error=AGENT_LLM_TIMEOUT_MESSAGE,
+            "refined_agent_answer",
+            AgentAnswerPiece(
+                answer_piece=content,
+                level=1,
+                level_question_num=0,
+                answer_type="agent_level_answer",
            ),
            writer,
        )
-
-        return RefinedAnswerUpdate(
-            refined_answer=None,
-            refined_answer_quality=False,  # TODO: replace this with the actual check value
-            refined_agent_stats=None,
-            agent_refined_end_time=None,
-            agent_refined_metrics=AgentRefinedMetrics(
-                refined_doc_boost_factor=0.0,
-                refined_question_boost_factor=0.0,
-                duration_s=None,
-            ),
-            log_messages=[
-                get_langgraph_node_log_string(
-                    graph_component="main",
-                    node_name="generate refined answer",
-                    node_start_time=node_start_time,
-                    result=agent_error.error_result or "An LLM error occurred",
-                )
-            ],
-        )
+        end_stream_token = datetime.now()
+        dispatch_timings.append((end_stream_token - start_stream_token).microseconds)
+        streamed_tokens.append(content)

    logger.debug(
        f"Average dispatch time for refined answer: {sum(dispatch_timings) / len(dispatch_timings)}"
@@ -387,47 +261,54 @@ def generate_validate_refined_answer(
    response = merge_content(*streamed_tokens)
    answer = cast(str, response)

-    # run a validation step for the refined answer only
-
-    msg = [
-        HumanMessage(
-            content=REFINED_ANSWER_VALIDATION_PROMPT.format(
-                question=question,
-                history=prompt_enrichment_components.history,
-                answered_sub_questions=sub_question_answer_str,
-                relevant_docs=relevant_docs_str,
-                proposed_answer=answer,
-                persona_specification=persona_contextualized_prompt,
-            )
-        )
-    ]
-
-    validation_model = graph_config.tooling.fast_llm
-    try:
-        validation_response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION,
-            validation_model.invoke,
-            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
-        )
-        refined_answer_quality = binary_string_test_after_answer_separator(
-            text=cast(str, validation_response.content),
-            positive_value=AGENT_POSITIVE_VALUE_STR,
-            separator=AGENT_ANSWER_SEPARATOR,
-        )
-    except (LLMTimeoutError, TimeoutError):
-        refined_answer_quality = True
-        logger.error("LLM Timeout Error - validate refined answer")
-
-    except LLMRateLimitError:
-        refined_answer_quality = True
-        logger.error("LLM Rate Limit Error - validate refined answer")
-
    refined_agent_stats = RefinedAgentStats(
        revision_doc_efficiency=refined_doc_effectiveness,
        revision_question_efficiency=revision_question_efficiency,
    )

+    logger.debug(f"\n\n---INITIAL ANSWER ---\n\n Answer:\n Agent: {initial_answer}")
+    logger.debug("-" * 10)
+    logger.debug(f"\n\n---REVISED AGENT ANSWER ---\n\n Answer:\n Agent: {answer}")
+
+    logger.debug("-" * 100)
+
+    if state.initial_agent_stats:
+        initial_doc_boost_factor = state.initial_agent_stats.agent_effectiveness.get(
+            "utilized_chunk_ratio", "--"
+        )
+        initial_support_boost_factor = (
+            state.initial_agent_stats.agent_effectiveness.get("support_ratio", "--")
+        )
+        num_initial_verified_docs = state.initial_agent_stats.original_question.get(
+            "num_verified_documents", "--"
+        )
+        initial_verified_docs_avg_score = (
+            state.initial_agent_stats.original_question.get("verified_avg_score", "--")
+        )
+        initial_sub_questions_verified_docs = (
+            state.initial_agent_stats.sub_questions.get("num_verified_documents", "--")
+        )
+
+        logger.debug("INITIAL AGENT STATS")
+        logger.debug(f"Document Boost Factor: {initial_doc_boost_factor}")
+        logger.debug(f"Support Boost Factor: {initial_support_boost_factor}")
+        logger.debug(f"Originally Verified Docs: {num_initial_verified_docs}")
+        logger.debug(
+            f"Originally Verified Docs Avg Score: {initial_verified_docs_avg_score}"
+        )
+        logger.debug(
+            f"Sub-Questions Verified Docs: {initial_sub_questions_verified_docs}"
+        )
+    if refined_agent_stats:
+        logger.debug("-" * 10)
+        logger.debug("REFINED AGENT STATS")
+        logger.debug(
+            f"Revision Doc Factor: {refined_agent_stats.revision_doc_efficiency}"
+        )
+        logger.debug(
+            f"Revision Question Factor: {refined_agent_stats.revision_question_efficiency}"
+        )
+
    agent_refined_end_time = datetime.now()
    if state.agent_refined_start_time:
        agent_refined_duration = (
@@ -444,7 +325,7 @@ def generate_validate_refined_answer(

    return RefinedAnswerUpdate(
        refined_answer=answer,
-        refined_answer_quality=refined_answer_quality,
+        refined_answer_quality=True,  # TODO: replace this with the actual check value
        refined_agent_stats=refined_agent_stats,
        agent_refined_end_time=agent_refined_end_time,
        agent_refined_metrics=agent_refined_metrics,
--- a/backend/onyx/agents/agent_search/deep_search/main/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/states.py
@@ -17,7 +17,6 @@ from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
 from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
 from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
 from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
 from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
@@ -77,7 +76,6 @@ class InitialAnswerUpdate(LoggerUpdate):
    """

    initial_answer: str | None = None
-    answer_error: AgentErrorLog | None = None
    initial_agent_stats: InitialAgentResultStats | None = None
    generated_sub_questions: list[str] = []
    agent_base_end_time: datetime | None = None
@@ -90,7 +88,6 @@ class RefinedAnswerUpdate(RefinedAgentEndStats, LoggerUpdate):
    """

    refined_answer: str | None = None
-    answer_error: AgentErrorLog | None = None
    refined_agent_stats: RefinedAgentStats | None = None
    refined_answer_quality: bool = False

--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
@@ -16,46 +16,16 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
    QueryExpansionUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AgentLLMErrorType,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
-from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
-)
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    QUERY_REWRITING_PROMPT,
 )
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="Query rewriting failed due to LLM timeout - the original question will be used.",
-    rate_limit="Query rewriting failed due to LLM rate limit - the original question will be used.",
-    general_error="Query rewriting failed due to LLM error - the original question will be used.",
-)


-@log_function_time(print_only=True)
 def expand_queries(
    state: ExpandedRetrievalInput,
    config: RunnableConfig,
@@ -71,7 +41,7 @@ def expand_queries(
    node_start_time = datetime.now()
    question = state.question

-    model = graph_config.tooling.fast_llm
+    llm = graph_config.tooling.fast_llm
    sub_question_id = state.sub_question_id
    if sub_question_id is None:
        level, question_num = 0, 0
@@ -84,45 +54,13 @@ def expand_queries(
        )
    ]

-    agent_error: AgentErrorLog | None = None
-    llm_response_list: list[BaseMessage_Content] = []
-    llm_response = ""
-    rewritten_queries = []
+    llm_response_list = dispatch_separated(
+        llm.stream(prompt=msg), dispatch_subquery(level, question_num, writer)
+    )

-    try:
-        llm_response_list = run_with_timeout(
-            AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION,
-            dispatch_separated,
-            model.stream(
-                prompt=msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
-            ),
-            dispatch_subquery(level, question_num, writer),
-        )
-        llm_response = merge_message_runs(llm_response_list, chunk_separator="")[
-            0
-        ].content
-        rewritten_queries = llm_response.split("\n")
-        log_result = f"Number of expanded queries: {len(rewritten_queries)}"
+    llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content

-    except (LLMTimeoutError, TimeoutError):
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.TIMEOUT,
-            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-            error_result=_llm_node_error_strings.timeout,
-        )
-        logger.error("LLM Timeout Error - expand queries")
-        log_result = agent_error.error_result
-
-    except LLMRateLimitError:
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.RATE_LIMIT,
-            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-            error_result=_llm_node_error_strings.rate_limit,
-        )
-        logger.error("LLM Rate Limit Error - expand queries")
-        log_result = agent_error.error_result
-    # use subquestion as query if query generation fails
+    rewritten_queries = llm_response.split("\n")

    return QueryExpansionUpdate(
        expanded_queries=rewritten_queries,
@@ -131,7 +69,7 @@ def expand_queries(
                graph_component="shared - expanded retrieval",
                node_name="expand queries",
                node_start_time=node_start_time,
-                result=log_result,
+                result=f"Number of expanded queries: {len(rewritten_queries)}",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
@@ -21,15 +21,12 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.configs.agent_configs import AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS
 from onyx.configs.agent_configs import AGENT_RERANKING_STATS
 from onyx.context.search.models import InferenceSection
-from onyx.context.search.models import RerankingDetails
+from onyx.context.search.models import SearchRequest
+from onyx.context.search.pipeline import retrieval_preprocessing
 from onyx.context.search.postprocessing.postprocessing import rerank_sections
-from onyx.context.search.postprocessing.postprocessing import should_rerank
 from onyx.db.engine import get_session_context_manager
-from onyx.db.search_settings import get_current_search_settings
-from onyx.utils.timing import log_function_time


-@log_function_time(print_only=True)
 def rerank_documents(
    state: ExpandedRetrievalState, config: RunnableConfig
 ) -> DocRerankingUpdate:
@@ -42,8 +39,6 @@ def rerank_documents(

    # Rerank post retrieval and verification. First, create a search query
    # then create the list of reranked sections
-    # If no question defined/question is None in the state, use the original
-    # question from the search request as query

    graph_config = cast(GraphConfig, config["metadata"]["config"])
    question = (
@@ -52,42 +47,44 @@ def rerank_documents(
    assert (
        graph_config.tooling.search_tool
    ), "search_tool must be provided for agentic search"
+    with get_session_context_manager() as db_session:
+        # we ignore some of the user specified fields since this search is
+        # internal to agentic search, but we still want to pass through
+        # persona (for stuff like document sets) and rerank settings
+        # (to not make an unnecessary db call).
+        search_request = SearchRequest(
+            query=question,
+            persona=graph_config.inputs.search_request.persona,
+            rerank_settings=graph_config.inputs.search_request.rerank_settings,
+        )
+        _search_query = retrieval_preprocessing(
+            search_request=search_request,
+            user=graph_config.tooling.search_tool.user,  # bit of a hack
+            llm=graph_config.tooling.fast_llm,
+            db_session=db_session,
+        )

-    # Note that these are passed in values from the API and are overrides which are typically None
-    rerank_settings = graph_config.inputs.search_request.rerank_settings
-    allow_agent_reranking = graph_config.behavior.allow_agent_reranking
+    # skip section filtering

-    if rerank_settings is None:
-        with get_session_context_manager() as db_session:
-            search_settings = get_current_search_settings(db_session)
-            if not search_settings.disable_rerank_for_streaming:
-                rerank_settings = RerankingDetails.from_db_model(search_settings)
-
-    # Initial default: no reranking. Will be overwritten below if reranking is warranted
-    reranked_documents = verified_documents
-
-    if should_rerank(rerank_settings) and len(verified_documents) > 0:
+    if (
+        _search_query.rerank_settings
+        and _search_query.rerank_settings.rerank_model_name
+        and _search_query.rerank_settings.num_rerank > 0
+        and len(verified_documents) > 0
+    ):
        if len(verified_documents) > 1:
-            if not allow_agent_reranking:
-                logger.info("Use of local rerank model without GPU, skipping reranking")
-            # No reranking, stay with verified_documents as default
-
-            else:
-                # Reranking is warranted, use the rerank_sections functon
-                reranked_documents = rerank_sections(
-                    query_str=question,
-                    # if runnable, then rerank_settings is not None
-                    rerank_settings=cast(RerankingDetails, rerank_settings),
-                    sections_to_rerank=verified_documents,
-                )
-        else:
-            logger.warning(
-                f"{len(verified_documents)} verified document(s) found, skipping reranking"
+            reranked_documents = rerank_sections(
+                _search_query,
+                verified_documents,
            )
-            # No reranking, stay with verified_documents as default
+        else:
+            num = "No" if len(verified_documents) == 0 else "One"
+            logger.warning(f"{num} verified document(s) found, skipping reranking")
+            reranked_documents = verified_documents
    else:
        logger.warning("No reranking settings found, using unranked documents")
-        # No reranking, stay with verified_documents as default
+        reranked_documents = verified_documents
+
    if AGENT_RERANKING_STATS:
        fit_scores = get_fit_scores(verified_documents, reranked_documents)
    else:
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/retrieve_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/retrieve_documents.py
@@ -23,15 +23,12 @@ from onyx.configs.agent_configs import AGENT_RETRIEVAL_STATS
 from onyx.context.search.models import InferenceSection
 from onyx.db.engine import get_session_context_manager
 from onyx.tools.models import SearchQueryInfo
-from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.tool_implementations.search.search_tool import (
    SEARCH_RESPONSE_SUMMARY_ID,
 )
 from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
-from onyx.utils.timing import log_function_time


-@log_function_time(print_only=True)
 def retrieve_documents(
    state: RetrievalInput, config: RunnableConfig
 ) -> DocRetrievalUpdate:
@@ -70,12 +67,9 @@ def retrieve_documents(
    with get_session_context_manager() as db_session:
        for tool_response in search_tool.run(
            query=query_to_retrieve,
-            override_kwargs=SearchToolOverrideKwargs(
-                force_no_rerank=True,
-                alternate_db_session=db_session,
-                retrieved_sections_callback=callback_container.append,
-                skip_query_analysis=not state.base_search,
-            ),
+            force_no_rerank=True,
+            alternate_db_session=db_session,
+            retrieved_sections_callback=callback_container.append,
        ):
            # get retrieved docs to send to the rest of the graph
            if tool_response.id == SEARCH_RESPONSE_SUMMARY_ID:
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
@@ -1,7 +1,5 @@
-from datetime import datetime
 from typing import cast

-from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.runnables.config import RunnableConfig

@@ -12,40 +10,14 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
    DocVerificationUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
-from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
-    binary_string_test,
-)
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_POSITIVE_VALUE_STR,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
-from onyx.agents.agent_search.shared_graph_utils.utils import (
-    get_langgraph_node_log_string,
-)
-from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    DOCUMENT_VERIFICATION_PROMPT,
 )
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="The LLM timed out. The document could not be verified. The document will be treated as 'relevant'",
-    rate_limit="The LLM encountered a rate limit. The document could not be verified. The document will be treated as 'relevant'",
-    general_error="The LLM encountered an error. The document could not be verified. The document will be treated as 'relevant'",
-)


-@log_function_time(print_only=True)
 def verify_documents(
    state: DocVerificationInput, config: RunnableConfig
 ) -> DocVerificationUpdate:
@@ -54,14 +26,12 @@ def verify_documents(

    Args:
        state (DocVerificationInput): The current state
-        config (RunnableConfig): Configuration containing AgentSearchConfig
+        config (RunnableConfig): Configuration containing ProSearchConfig

    Updates:
        verified_documents: list[InferenceSection]
    """

-    node_start_time = datetime.now()
-
    question = state.question
    retrieved_document_to_verify = state.retrieved_document_to_verify
    document_content = retrieved_document_to_verify.combined_content
@@ -81,43 +51,12 @@ def verify_documents(
        )
    ]

-    response: BaseMessage | None = None
+    response = fast_llm.invoke(msg)

-    verified_documents = [
-        retrieved_document_to_verify
-    ]  # default is to treat document as relevant
-
-    try:
-        response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION,
-            fast_llm.invoke,
-            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION,
-        )
-
-        assert isinstance(response.content, str)
-        if not binary_string_test(
-            text=response.content, positive_value=AGENT_POSITIVE_VALUE_STR
-        ):
-            verified_documents = []
-
-    except (LLMTimeoutError, TimeoutError):
-        # In this case, we decide to continue and don't raise an error, as
-        # little harm in letting some docs through that are less relevant.
-        logger.error("LLM Timeout Error - verify documents")
-
-    except LLMRateLimitError:
-        # In this case, we decide to continue and don't raise an error, as
-        # little harm in letting some docs through that are less relevant.
-        logger.error("LLM Rate Limit Error - verify documents")
+    verified_documents = []
+    if isinstance(response.content, str) and "yes" in response.content.lower():
+        verified_documents.append(retrieved_document_to_verify)

    return DocVerificationUpdate(
        verified_documents=verified_documents,
-        log_messages=[
-            get_langgraph_node_log_string(
-                graph_component="shared - expanded retrieval",
-                node_name="verify documents",
-                node_start_time=node_start_time,
-            )
-        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/states.py
@@ -21,13 +21,9 @@ from onyx.context.search.models import InferenceSection


 class ExpandedRetrievalInput(SubgraphCoreState):
-    # exception from 'no default value'for LangGraph input states
-    # Here, sub_question_id default None implies usage for the
-    # original question. This is sometimes needed for nested sub-graphs
-
+    question: str = ""
+    base_search: bool = False
    sub_question_id: str | None = None
-    question: str
-    base_search: bool


 ## Update/Return States
@@ -38,7 +34,7 @@ class QueryExpansionUpdate(LoggerUpdate, BaseModel):
    log_messages: list[str] = []


-class DocVerificationUpdate(LoggerUpdate, BaseModel):
+class DocVerificationUpdate(BaseModel):
    verified_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []


@@ -92,4 +88,4 @@ class DocVerificationInput(ExpandedRetrievalInput):


 class RetrievalInput(ExpandedRetrievalInput):
-    query_to_retrieve: str
+    query_to_retrieve: str = ""
--- a/backend/onyx/agents/agent_search/models.py
+++ b/backend/onyx/agents/agent_search/models.py
@@ -67,7 +67,6 @@ class GraphSearchConfig(BaseModel):
    # Whether to allow creation of refinement questions (and entity extraction, etc.)
    allow_refinement: bool = True
    skip_gen_ai_answer_generation: bool = False
-    allow_agent_reranking: bool = False


 class GraphConfig(BaseModel):
--- a/backend/onyx/agents/agent_search/orchestration/nodes/basic_use_tool_response.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/basic_use_tool_response.py
--- a/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
@@ -25,7 +25,7 @@ logger = setup_logger()
 # and a function that handles extracting the necessary fields
 # from the state and config
 # TODO: fan-out to multiple tool call nodes? Make this configurable?
-def choose_tool(
+def llm_tool_choice(
    state: ToolChoiceState,
    config: RunnableConfig,
    writer: StreamWriter = lambda _: None,
--- a/backend/onyx/agents/agent_search/orchestration/nodes/tool_call.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/tool_call.py
@@ -28,7 +28,7 @@ def emit_packet(packet: AnswerPacket, writer: StreamWriter) -> None:
    write_custom_event("basic_response", packet, writer)


-def call_tool(
+def tool_call(
    state: ToolChoiceUpdate,
    config: RunnableConfig,
    writer: StreamWriter = lambda _: None,
--- a/backend/onyx/agents/agent_search/run_graph.py
+++ b/backend/onyx/agents/agent_search/run_graph.py
@@ -12,7 +12,7 @@ from onyx.agents.agent_search.deep_search.main.graph_builder import (
    main_graph_builder as main_graph_builder_a,
 )
 from onyx.agents.agent_search.deep_search.main.states import (
-    MainInput as MainInput,
+    MainInput as MainInput_a,
 )
 from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
@@ -21,7 +21,6 @@ from onyx.chat.models import AnswerPacket
 from onyx.chat.models import AnswerStream
 from onyx.chat.models import ExtendedToolResponse
 from onyx.chat.models import RefinedAnswerImprovement
-from onyx.chat.models import StreamingError
 from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import SubQueryPiece
 from onyx.chat.models import SubQuestionPiece
@@ -34,7 +33,6 @@ from onyx.llm.factory import get_default_llms
 from onyx.tools.tool_runner import ToolCallKickoff
 from onyx.utils.logger import setup_logger

-
 logger = setup_logger()

 _COMPILED_GRAPH: CompiledStateGraph | None = None
@@ -74,15 +72,13 @@ def _parse_agent_event(
            return cast(AnswerPacket, event["data"])
        elif event["name"] == "refined_answer_improvement":
            return cast(RefinedAnswerImprovement, event["data"])
-        elif event["name"] == "refined_sub_question_creation_error":
-            return cast(StreamingError, event["data"])
    return None


 def manage_sync_streaming(
    compiled_graph: CompiledStateGraph,
    config: GraphConfig,
-    graph_input: BasicInput | MainInput,
+    graph_input: BasicInput | MainInput_a,
 ) -> Iterable[StreamEvent]:
    message_id = config.persistence.message_id if config.persistence else None
    for event in compiled_graph.stream(
@@ -96,7 +92,7 @@ def manage_sync_streaming(
 def run_graph(
    compiled_graph: CompiledStateGraph,
    config: GraphConfig,
-    input: BasicInput | MainInput,
+    input: BasicInput | MainInput_a,
 ) -> AnswerStream:
    config.behavior.perform_initial_search_decomposition = (
        INITIAL_SEARCH_DECOMPOSITION_ENABLED
@@ -127,7 +123,9 @@ def run_main_graph(
 ) -> AnswerStream:
    compiled_graph = load_compiled_graph()

-    input = MainInput(log_messages=[])
+    input = MainInput_a(
+        base_question=config.inputs.search_request.query, log_messages=[]
+    )

    # Agent search is not a Tool per se, but this is helpful for the frontend
    yield ToolCallKickoff(
@@ -142,7 +140,7 @@ def run_basic_graph(
 ) -> AnswerStream:
    graph = basic_graph_builder()
    compiled_graph = graph.compile()
-    input = BasicInput(unused=True)
+    input = BasicInput()
    return run_graph(compiled_graph, config, input)


@@ -174,7 +172,9 @@ if __name__ == "__main__":
            # search_request.persona = get_persona_by_id(1, None, db_session)
            # config.perform_initial_search_path_decision = False
            config.behavior.perform_initial_search_decomposition = True
-            input = MainInput(log_messages=[])
+            input = MainInput_a(
+                base_question=config.inputs.search_request.query, log_messages=[]
+            )

            tool_responses: list = []
            for output in run_graph(compiled_graph, config, input):
--- a/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
@@ -7,7 +7,6 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.models import (
    AgentPromptEnrichmentComponents,
 )
-from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_persona_agent_prompt_expressions,
 )
@@ -41,7 +40,13 @@ def build_sub_question_answer_prompt(

    date_str = build_date_time_string()

-    docs_str = format_docs(docs)
+    # TODO: This should include document metadata and title
+    docs_format_list = [
+        f"Document Number: [D{doc_num + 1}]\nContent: {doc.combined_content}\n\n"
+        for doc_num, doc in enumerate(docs)
+    ]
+
+    docs_str = "\n\n".join(docs_format_list)

    docs_str = trim_prompt_piece(
        config,
@@ -145,38 +150,3 @@ def get_prompt_enrichment_components(
        history=history,
        date_str=date_str,
    )
-
-
-def binary_string_test(text: str, positive_value: str = "yes") -> bool:
-    """
-    Tests if a string contains a positive value (case-insensitive).
-
-    Args:
-        text: The string to test
-        positive_value: The value to look for (defaults to "yes")
-
-    Returns:
-        True if the positive value is found in the text
-    """
-    return positive_value.lower() in text.lower()
-
-
-def binary_string_test_after_answer_separator(
-    text: str, positive_value: str = "yes", separator: str = "Answer:"
-) -> bool:
-    """
-    Tests if a string contains a positive value (case-insensitive).
-
-    Args:
-        text: The string to test
-        positive_value: The value to look for (defaults to "yes")
-
-    Returns:
-        True if the positive value is found in the text
-    """
-
-    if separator not in text:
-        return False
-    relevant_text = text.split(f"{separator}")[-1]
-
-    return binary_string_test(relevant_text, positive_value)
--- a/backend/onyx/agents/agent_search/shared_graph_utils/calculations.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/calculations.py
@@ -1,11 +1,7 @@
 import numpy as np

-from onyx.agents.agent_search.shared_graph_utils.models import AnswerGenerationDocuments
 from onyx.agents.agent_search.shared_graph_utils.models import RetrievalFitScoreMetrics
 from onyx.agents.agent_search.shared_graph_utils.models import RetrievalFitStats
-from onyx.agents.agent_search.shared_graph_utils.operators import (
-    dedup_inference_section_list,
-)
 from onyx.chat.models import SectionRelevancePiece
 from onyx.context.search.models import InferenceSection
 from onyx.utils.logger import setup_logger
@@ -100,106 +96,3 @@ def get_fit_scores(
    )

    return fit_eval
-
-
-def get_answer_generation_documents(
-    relevant_docs: list[InferenceSection],
-    context_documents: list[InferenceSection],
-    original_question_docs: list[InferenceSection],
-    max_docs: int,
-) -> AnswerGenerationDocuments:
-    """
-    Create a deduplicated list of documents to stream, prioritizing relevant docs.
-
-    Args:
-        relevant_docs: Primary documents to include
-        context_documents: Additional context documents to append
-        original_question_docs: Original question documents to append
-        max_docs: Maximum number of documents to return
-
-    Returns:
-        List of deduplicated documents, limited to max_docs
-    """
-    # get relevant_doc ids
-    relevant_doc_ids = [doc.center_chunk.document_id for doc in relevant_docs]
-
-    # Start with relevant docs or fallback to original question docs
-    streaming_documents = relevant_docs.copy()
-
-    # Use a set for O(1) lookups of document IDs
-    seen_doc_ids = {doc.center_chunk.document_id for doc in streaming_documents}
-
-    # Combine additional documents to check in one iteration
-    additional_docs = context_documents + original_question_docs
-    for doc_idx, doc in enumerate(additional_docs):
-        doc_id = doc.center_chunk.document_id
-        if doc_id not in seen_doc_ids:
-            streaming_documents.append(doc)
-            seen_doc_ids.add(doc_id)
-
-    streaming_documents = dedup_inference_section_list(streaming_documents)
-
-    relevant_streaming_docs = [
-        doc
-        for doc in streaming_documents
-        if doc.center_chunk.document_id in relevant_doc_ids
-    ]
-    relevant_streaming_docs = dedup_sort_inference_section_list(relevant_streaming_docs)
-
-    additional_streaming_docs = [
-        doc
-        for doc in streaming_documents
-        if doc.center_chunk.document_id not in relevant_doc_ids
-    ]
-    additional_streaming_docs = dedup_sort_inference_section_list(
-        additional_streaming_docs
-    )
-
-    for doc in additional_streaming_docs:
-        if doc.center_chunk.score:
-            doc.center_chunk.score += -2.0
-        else:
-            doc.center_chunk.score = -2.0
-
-    sorted_streaming_documents = relevant_streaming_docs + additional_streaming_docs
-
-    return AnswerGenerationDocuments(
-        streaming_documents=sorted_streaming_documents[:max_docs],
-        context_documents=relevant_streaming_docs[:max_docs],
-    )
-
-
-def dedup_sort_inference_section_list(
-    sections: list[InferenceSection],
-) -> list[InferenceSection]:
-    """Deduplicates InferenceSections by document_id and sorts by score.
-
-    Args:
-        sections: List of InferenceSections to deduplicate and sort
-
-    Returns:
-        Deduplicated list of InferenceSections sorted by score in descending order
-    """
-    # dedupe/merge with existing framework
-    sections = dedup_inference_section_list(sections)
-
-    # Use dict to deduplicate by document_id, keeping highest scored version
-    unique_sections: dict[str, InferenceSection] = {}
-    for section in sections:
-        doc_id = section.center_chunk.document_id
-        if doc_id not in unique_sections:
-            unique_sections[doc_id] = section
-            continue
-
-        # Keep version with higher score
-        existing_score = unique_sections[doc_id].center_chunk.score or 0
-        new_score = section.center_chunk.score or 0
-        if new_score > existing_score:
-            unique_sections[doc_id] = section
-
-    # Sort by score in descending order, handling None scores
-    sorted_sections = sorted(
-        unique_sections.values(), key=lambda x: x.center_chunk.score or 0, reverse=True
-    )
-
-    return sorted_sections
--- a/backend/onyx/agents/agent_search/shared_graph_utils/constants.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/constants.py
@@ -1,19 +0,0 @@
-from enum import Enum
-
-AGENT_LLM_TIMEOUT_MESSAGE = "The agent timed out. Please try again."
-AGENT_LLM_ERROR_MESSAGE = "The agent encountered an error. Please try again."
-AGENT_LLM_RATELIMIT_MESSAGE = (
-    "The agent encountered a rate limit error. Please try again."
-)
-LLM_ANSWER_ERROR_MESSAGE = "The question was not answered due to an LLM error."
-
-AGENT_POSITIVE_VALUE_STR = "yes"
-AGENT_NEGATIVE_VALUE_STR = "no"
-
-AGENT_ANSWER_SEPARATOR = "Answer:"
-
-
-class AgentLLMErrorType(str, Enum):
-    TIMEOUT = "timeout"
-    RATE_LIMIT = "rate_limit"
-    GENERAL_ERROR = "general_error"
--- a/backend/onyx/agents/agent_search/shared_graph_utils/models.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/models.py
@@ -1,5 +1,3 @@
-from typing import Any
-
 from pydantic import BaseModel

 from onyx.agents.agent_search.deep_search.main.models import (
@@ -58,12 +56,6 @@ class InitialAgentResultStats(BaseModel):
    agent_effectiveness: dict[str, float | int | None]


-class AgentErrorLog(BaseModel):
-    error_message: str
-    error_type: str
-    error_result: str
-
-
 class RefinedAgentStats(BaseModel):
    revision_doc_efficiency: float | None
    revision_question_efficiency: float | None
@@ -118,11 +110,6 @@ class SubQuestionAnswerResults(BaseModel):
    sub_question_retrieval_stats: AgentChunkRetrievalStats


-class StructuredSubquestionDocuments(BaseModel):
-    cited_documents: list[InferenceSection]
-    context_documents: list[InferenceSection]
-
-
 class CombinedAgentMetrics(BaseModel):
    timings: AgentTimings
    base_metrics: AgentBaseMetrics | None
@@ -139,17 +126,3 @@ class AgentPromptEnrichmentComponents(BaseModel):
    persona_prompts: PersonaPromptExpressions
    history: str
    date_str: str
-
-
-class LLMNodeErrorStrings(BaseModel):
-    timeout: str = "LLM Timeout Error"
-    rate_limit: str = "LLM Rate Limit Error"
-    general_error: str = "General LLM Error"
-
-
-class AnswerGenerationDocuments(BaseModel):
-    streaming_documents: list[InferenceSection]
-    context_documents: list[InferenceSection]
-
-
-BaseMessage_Content = str | list[str | dict[str, Any]]
--- a/backend/onyx/agents/agent_search/shared_graph_utils/operators.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/operators.py
@@ -12,13 +12,6 @@ def dedup_inference_sections(
    return deduped


-def dedup_inference_section_list(
-    list: list[InferenceSection],
-) -> list[InferenceSection]:
-    deduped = _merge_sections(list)
-    return deduped
-
-
 def dedup_question_answer_results(
    question_answer_results_1: list[SubQuestionAnswerResults],
    question_answer_results_2: list[SubQuestionAnswerResults],
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -20,18 +20,10 @@ from onyx.agents.agent_search.models import GraphInputs
 from onyx.agents.agent_search.models import GraphPersistence
 from onyx.agents.agent_search.models import GraphSearchConfig
 from onyx.agents.agent_search.models import GraphTooling
-from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
 from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
 from onyx.agents.agent_search.shared_graph_utils.models import PersonaPromptExpressions
-from onyx.agents.agent_search.shared_graph_utils.models import (
-    StructuredSubquestionDocuments,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import SubQuestionAnswerResults
-from onyx.agents.agent_search.shared_graph_utils.operators import (
-    dedup_inference_section_list,
-)
 from onyx.chat.models import AnswerPacket
 from onyx.chat.models import AnswerStyleConfig
 from onyx.chat.models import CitationConfig
@@ -42,10 +34,6 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
-)
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION
 from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from onyx.configs.constants import DEFAULT_PERSONA_ID
@@ -58,8 +46,6 @@ from onyx.context.search.models import SearchRequest
 from onyx.db.engine import get_session_context_manager
 from onyx.db.persona import get_persona_by_id
 from onyx.db.persona import Persona
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.llm.interfaces import LLM
 from onyx.prompts.agent_search import (
    ASSISTANT_SYSTEM_PROMPT_DEFAULT,
@@ -72,7 +58,6 @@ from onyx.prompts.agent_search import (
 )
 from onyx.prompts.prompt_utils import handle_onyx_date_awareness
 from onyx.tools.force import ForceUseTool
-from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.tool_constructor import SearchToolConfig
 from onyx.tools.tool_implementations.search.search_tool import (
    SEARCH_RESPONSE_SUMMARY_ID,
@@ -80,10 +65,8 @@ from onyx.tools.tool_implementations.search.search_tool import (
 from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.utils import explicit_tool_calling_supported
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout

-logger = setup_logger()
+BaseMessage_Content = str | list[str | dict[str, Any]]


 # Post-processing
@@ -235,10 +218,7 @@ def get_test_config(
        using_tool_calling_llm=using_tool_calling_llm,
    )

-    chat_session_id = (
-        os.environ.get("ONYX_AS_CHAT_SESSION_ID")
-        or "00000000-0000-0000-0000-000000000000"
-    )
+    chat_session_id = os.environ.get("ONYX_AS_CHAT_SESSION_ID")
    assert (
        chat_session_id is not None
    ), "ONYX_AS_CHAT_SESSION_ID must be set for backend tests"
@@ -361,12 +341,8 @@ def retrieve_search_docs(
    with get_session_context_manager() as db_session:
        for tool_response in search_tool.run(
            query=question,
-            override_kwargs=SearchToolOverrideKwargs(
-                force_no_rerank=True,
-                alternate_db_session=db_session,
-                retrieved_sections_callback=None,
-                skip_query_analysis=False,
-            ),
+            force_no_rerank=True,
+            alternate_db_session=db_session,
        ):
            # get retrieved docs to send to the rest of the graph
            if tool_response.id == SEARCH_RESPONSE_SUMMARY_ID:
@@ -396,26 +372,8 @@ def summarize_history(
        )
    )

-    try:
-        history_response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION,
-            llm.invoke,
-            history_context_prompt,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
-        )
-    except (LLMTimeoutError, TimeoutError):
-        logger.error("LLM Timeout Error - summarize history")
-        return (
-            history  # this is what is done at this point anyway, so we default to this
-        )
-    except LLMRateLimitError:
-        logger.error("LLM Rate Limit Error - summarize history")
-        return (
-            history  # this is what is done at this point anyway, so we default to this
-        )
-
+    history_response = llm.invoke(history_context_prompt)
    assert isinstance(history_response.content, str)
-
    return history_response.content


@@ -481,27 +439,3 @@ def remove_document_citations(text: str) -> str:
    #   \d+  - one or more digits
    #   \]   - literal ] character
    return re.sub(r"\[(?:D|Q)?\d+\]", "", text)
-
-
-def get_deduplicated_structured_subquestion_documents(
-    sub_question_results: list[SubQuestionAnswerResults],
-) -> StructuredSubquestionDocuments:
-    """
-    Extract and deduplicate all cited documents from sub-question results.
-
-    Args:
-        sub_question_results: List of sub-question results containing cited documents
-
-    Returns:
-        Deduplicated list of cited documents
-    """
-    cited_docs = [
-        doc for result in sub_question_results for doc in result.cited_documents
-    ]
-    context_docs = [
-        doc for result in sub_question_results for doc in result.context_documents
-    ]
-    return StructuredSubquestionDocuments(
-        cited_documents=dedup_inference_section_list(cited_docs),
-        context_documents=dedup_inference_section_list(context_docs),
-    )
--- a/backend/onyx/auth/api_key.py
+++ b/backend/onyx/auth/api_key.py
@@ -10,7 +10,6 @@ from pydantic import BaseModel

 from onyx.auth.schemas import UserRole
 from onyx.configs.app_configs import API_KEY_HASH_ROUNDS
-from shared_configs.configs import MULTI_TENANT


 _API_KEY_HEADER_NAME = "Authorization"
@@ -36,7 +35,8 @@ class ApiKeyDescriptor(BaseModel):


 def generate_api_key(tenant_id: str | None = None) -> str:
-    if not MULTI_TENANT or not tenant_id:
+    # For backwards compatibility, if no tenant_id, generate old style key
+    if not tenant_id:
        return _API_KEY_PREFIX + secrets.token_urlsafe(_API_KEY_LEN)

    encoded_tenant = quote(tenant_id)  # URL encode the tenant ID
--- a/backend/onyx/auth/email_utils.py
+++ b/backend/onyx/auth/email_utils.py
@@ -1,9 +1,7 @@
 import smtplib
-from datetime import datetime
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
-from email.utils import formatdate
-from email.utils import make_msgid
+from textwrap import dedent

 from onyx.configs.app_configs import EMAIL_CONFIGURED
 from onyx.configs.app_configs import EMAIL_FROM
@@ -12,156 +10,26 @@ from onyx.configs.app_configs import SMTP_PORT
 from onyx.configs.app_configs import SMTP_SERVER
 from onyx.configs.app_configs import SMTP_USER
 from onyx.configs.app_configs import WEB_DOMAIN
-from onyx.configs.constants import AuthType
 from onyx.configs.constants import TENANT_ID_COOKIE_NAME
 from onyx.db.models import User
-from shared_configs.configs import MULTI_TENANT
-
-HTML_EMAIL_TEMPLATE = """\
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width" />
-  <title>{title}</title>
-  <style>
-    body, table, td, a {{
-      font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
-      text-size-adjust: 100%;
-      margin: 0;
-      padding: 0;
-      -webkit-font-smoothing: antialiased;
-      -webkit-text-size-adjust: none;
-    }}
-    body {{
-      background-color: #f7f7f7;
-      color: #333;
-    }}
-    .body-content {{
-      color: #333;
-    }}
-    .email-container {{
-      width: 100%;
-      max-width: 600px;
-      margin: 0 auto;
-      background-color: #ffffff;
-      border-radius: 6px;
-      overflow: hidden;
-      border: 1px solid #eaeaea;
-    }}
-    .header {{
-      background-color: #000000;
-      padding: 20px;
-      text-align: center;
-    }}
-    .header img {{
-      max-width: 140px;
-    }}
-    .body-content {{
-      padding: 20px 30px;
-    }}
-    .title {{
-      font-size: 20px;
-      font-weight: bold;
-      margin: 0 0 10px;
-    }}
-    .message {{
-      font-size: 16px;
-      line-height: 1.5;
-      margin: 0 0 20px;
-    }}
-    .cta-button {{
-      display: inline-block;
-      padding: 12px 20px;
-      background-color: #000000;
-      color: #ffffff !important;
-      text-decoration: none;
-      border-radius: 4px;
-      font-weight: 500;
-    }}
-    .footer {{
-      font-size: 13px;
-      color: #6A7280;
-      text-align: center;
-      padding: 20px;
-    }}
-    .footer a {{
-      color: #6b7280;
-      text-decoration: underline;
-    }}
-  </style>
-</head>
-<body>
-  <table role="presentation" class="email-container" cellpadding="0" cellspacing="0">
-    <tr>
-      <td class="header">
-        <img
-          style="background-color: #ffffff; border-radius: 8px;"
-          src="https://www.onyx.app/logos/customer/onyx.png"
-          alt="Onyx Logo"
-        >
-      </td>
-    </tr>
-    <tr>
-      <td class="body-content">
-        <h1 class="title">{heading}</h1>
-        <div class="message">
-          {message}
-        </div>
-        {cta_block}
-      </td>
-    </tr>
-    <tr>
-      <td class="footer">
-        © {year} Onyx. All rights reserved.
-        <br>
-        Have questions? Join our Slack community <a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA">here</a>.
-      </td>
-    </tr>
-  </table>
-</body>
-</html>
-"""
-
-
-def build_html_email(
-    heading: str, message: str, cta_text: str | None = None, cta_link: str | None = None
-) -> str:
-    if cta_text and cta_link:
-        cta_block = f'<a class="cta-button" href="{cta_link}">{cta_text}</a>'
-    else:
-        cta_block = ""
-    return HTML_EMAIL_TEMPLATE.format(
-        title=heading,
-        heading=heading,
-        message=message,
-        cta_block=cta_block,
-        year=datetime.now().year,
-    )


 def send_email(
    user_email: str,
    subject: str,
-    html_body: str,
-    text_body: str,
+    body: str,
    mail_from: str = EMAIL_FROM,
 ) -> None:
    if not EMAIL_CONFIGURED:
        raise ValueError("Email is not configured.")

-    msg = MIMEMultipart("alternative")
+    msg = MIMEMultipart()
    msg["Subject"] = subject
    msg["To"] = user_email
-    msg["From"] = mail_from
-    msg["Date"] = formatdate(localtime=True)
-    msg["Message-ID"] = make_msgid(domain="onyx.app")
+    if mail_from:
+        msg["From"] = mail_from

-    part_text = MIMEText(text_body, "plain")
-    part_html = MIMEText(html_body, "html")
-
-    msg.attach(part_text)
-    msg.attach(part_html)
+    msg.attach(MIMEText(body))

    try:
        with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as s:
@@ -172,89 +40,41 @@ def send_email(
        raise e


-def send_subscription_cancellation_email(user_email: str) -> None:
-    # Example usage of the reusable HTML
-    subject = "Your Onyx Subscription Has Been Canceled"
-    heading = "Subscription Canceled"
-    message = (
-        "<p>We're sorry to see you go.</p>"
-        "<p>Your subscription has been canceled and will end on your next billing date.</p>"
-        "<p>If you change your mind, you can always come back!</p>"
-    )
-    cta_text = "Renew Subscription"
-    cta_link = "https://www.onyx.app/pricing"
-    html_content = build_html_email(heading, message, cta_text, cta_link)
-    text_content = (
-        "We're sorry to see you go.\n"
-        "Your subscription has been canceled and will end on your next billing date.\n"
-        "If you change your mind, visit https://www.onyx.app/pricing"
-    )
-    send_email(user_email, subject, html_content, text_content)
-
-
-def send_user_email_invite(
-    user_email: str, current_user: User, auth_type: AuthType
-) -> None:
+def send_user_email_invite(user_email: str, current_user: User) -> None:
    subject = "Invitation to Join Onyx Organization"
-    heading = "You've Been Invited!"
+    body = dedent(
+        f"""\
+        Hello,

-    # the exact action taken by the user, and thus the message, depends on the auth type
-    message = f"<p>You have been invited by {current_user.email} to join an organization on Onyx.</p>"
-    if auth_type == AuthType.CLOUD:
-        message += (
-            "<p>To join the organization, please click the button below to set a password "
-            "or login with Google and complete your registration.</p>"
-        )
-    elif auth_type == AuthType.BASIC:
-        message += (
-            "<p>To join the organization, please click the button below to set a password "
-            "and complete your registration.</p>"
-        )
-    elif auth_type == AuthType.GOOGLE_OAUTH:
-        message += (
-            "<p>To join the organization, please click the button below to login with Google "
-            "and complete your registration.</p>"
-        )
-    elif auth_type == AuthType.OIDC or auth_type == AuthType.SAML:
-        message += (
-            "<p>To join the organization, please click the button below to"
-            " complete your registration.</p>"
-        )
-    else:
-        raise ValueError(f"Invalid auth type: {auth_type}")
+        You have been invited to join an organization on Onyx.

-    cta_text = "Join Organization"
-    cta_link = f"{WEB_DOMAIN}/auth/signup?email={user_email}"
-    html_content = build_html_email(heading, message, cta_text, cta_link)
+        To join the organization, please visit the following link:

-    # text content is the fallback for clients that don't support HTML
-    # not as critical, so not having special cases for each auth type
-    text_content = (
-        f"You have been invited by {current_user.email} to join an organization on Onyx.\n"
-        "To join the organization, please visit the following link:\n"
-        f"{WEB_DOMAIN}/auth/signup?email={user_email}\n"
+        {WEB_DOMAIN}/auth/signup?email={user_email}
+
+        You'll be asked to set a password or login with Google to complete your registration.
+
+        Best regards,
+        The Onyx Team
+    """
    )
-    if auth_type == AuthType.CLOUD:
-        text_content += "You'll be asked to set a password or login with Google to complete your registration."

-    send_email(user_email, subject, html_content, text_content)
+    send_email(user_email, subject, body, current_user.email)


 def send_forgot_password_email(
    user_email: str,
    token: str,
-    tenant_id: str,
    mail_from: str = EMAIL_FROM,
+    tenant_id: str | None = None,
 ) -> None:
-    # Builds a forgot password email with or without fancy HTML
    subject = "Onyx Forgot Password"
    link = f"{WEB_DOMAIN}/auth/reset-password?token={token}"
-    if MULTI_TENANT:
+    if tenant_id:
        link += f"&{TENANT_ID_COOKIE_NAME}={tenant_id}"
-    message = f"<p>Click the following link to reset your password:</p><p>{link}</p>"
-    html_content = build_html_email("Reset Your Password", message)
-    text_content = f"Click the following link to reset your password: {link}"
-    send_email(user_email, subject, html_content, text_content, mail_from)
+        # Keep search param same name as cookie for simplicity
+    body = f"Click the following link to reset your password: {link}"
+    send_email(user_email, subject, body, mail_from)


 def send_user_verification_email(
@@ -262,12 +82,7 @@ def send_user_verification_email(
    token: str,
    mail_from: str = EMAIL_FROM,
 ) -> None:
-    # Builds a verification email
    subject = "Onyx Email Verification"
    link = f"{WEB_DOMAIN}/auth/verify-email?token={token}"
-    message = (
-        f"<p>Click the following link to verify your email address:</p><p>{link}</p>"
-    )
-    html_content = build_html_email("Verify Your Email", message)
-    text_content = f"Click the following link to verify your email address: {link}"
-    send_email(user_email, subject, html_content, text_content, mail_from)
+    body = f"Click the following link to verify your email address: {link}"
+    send_email(user_email, subject, body, mail_from)
--- a/backend/onyx/auth/noauth_user.py
+++ b/backend/onyx/auth/noauth_user.py
@@ -42,5 +42,4 @@ def fetch_no_auth_user(
        role=UserRole.BASIC if anonymous_user_enabled else UserRole.ADMIN,
        preferences=load_no_auth_user_preferences(store),
        is_anonymous_user=anonymous_user_enabled,
-        password_configured=False,
    )
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -1,7 +1,5 @@
 import json
-import random
 import secrets
-import string
 import uuid
 from collections.abc import AsyncGenerator
 from datetime import datetime
@@ -88,6 +86,7 @@ from onyx.db.auth import get_user_db
 from onyx.db.auth import SQLAlchemyUserAdminDB
 from onyx.db.engine import get_async_session
 from onyx.db.engine import get_async_session_with_tenant
+from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.models import AccessToken
 from onyx.db.models import OAuthAccount
@@ -95,7 +94,6 @@ from onyx.db.models import User
 from onyx.db.users import get_user_by_email
 from onyx.redis.redis_pool import get_async_redis_connection
 from onyx.redis.redis_pool import get_redis_client
-from onyx.server.utils import BasicAuthenticationError
 from onyx.utils.logger import setup_logger
 from onyx.utils.telemetry import create_milestone_and_report
 from onyx.utils.telemetry import optional_telemetry
@@ -105,11 +103,15 @@ from onyx.utils.variable_functionality import fetch_versioned_implementation
 from shared_configs.configs import async_return_default_schema
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
-from shared_configs.contextvars import get_current_tenant_id

 logger = setup_logger()


+class BasicAuthenticationError(HTTPException):
+    def __init__(self, detail: str):
+        super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)
+
+
 def is_user_admin(user: User | None) -> bool:
    if AUTH_TYPE == AuthType.DISABLED:
        return True
@@ -141,30 +143,6 @@ def get_display_email(email: str | None, space_less: bool = False) -> str:
    return email or ""


-def generate_password() -> str:
-    lowercase_letters = string.ascii_lowercase
-    uppercase_letters = string.ascii_uppercase
-    digits = string.digits
-    special_characters = string.punctuation
-
-    # Ensure at least one of each required character type
-    password = [
-        secrets.choice(uppercase_letters),
-        secrets.choice(digits),
-        secrets.choice(special_characters),
-    ]
-
-    # Fill the rest with a mix of characters
-    remaining_length = 12 - len(password)
-    all_characters = lowercase_letters + uppercase_letters + digits + special_characters
-    password.extend(secrets.choice(all_characters) for _ in range(remaining_length))
-
-    # Shuffle the password to randomize the position of the required characters
-    random.shuffle(password)
-
-    return "".join(password)
-
-
 def user_needs_to_be_verified() -> bool:
    if AUTH_TYPE == AuthType.BASIC or AUTH_TYPE == AuthType.CLOUD:
        return REQUIRE_EMAIL_VERIFICATION
@@ -214,8 +192,8 @@ def verify_email_is_invited(email: str) -> None:
    raise PermissionError("User not on allowed user whitelist")


-def verify_email_in_whitelist(email: str, tenant_id: str) -> None:
-    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+def verify_email_in_whitelist(email: str, tenant_id: str | None = None) -> None:
+    with get_session_with_tenant(tenant_id) as db_session:
        if not get_user_by_email(email, db_session):
            verify_email_is_invited(email)

@@ -411,7 +389,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
                "refresh_token": refresh_token,
            }

-            user: User | None = None
+            user: User

            try:
                # Attempt to get user by OAuth account
@@ -420,20 +398,15 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
            except exceptions.UserNotExists:
                try:
                    # Attempt to get user by email
-                    user = await self.user_db.get_by_email(account_email)
+                    user = await self.get_by_email(account_email)
                    if not associate_by_email:
                        raise exceptions.UserAlreadyExists()

-                    # Make sure user is not None before adding OAuth account
-                    if user is not None:
-                        user = await self.user_db.add_oauth_account(
-                            user, oauth_account_dict
-                        )
-                    else:
-                        # This shouldn't happen since get_by_email would raise UserNotExists
-                        # but adding as a safeguard
-                        raise exceptions.UserNotExists()
+                    user = await self.user_db.add_oauth_account(
+                        user, oauth_account_dict
+                    )

+                    # If user not found by OAuth account or email, create a new user
                except exceptions.UserNotExists:
                    password = self.password_helper.generate()
                    user_dict = {
@@ -444,36 +417,26 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):

                    user = await self.user_db.create(user_dict)

-                    # Add OAuth account only if user creation was successful
-                    if user is not None:
-                        await self.user_db.add_oauth_account(user, oauth_account_dict)
-                        await self.on_after_register(user, request)
-                    else:
-                        raise HTTPException(
-                            status_code=500, detail="Failed to create user account"
-                        )
+                    # Explicitly set the Postgres schema for this session to ensure
+                    # OAuth account creation happens in the correct tenant schema
+
+                    # Add OAuth account
+                    await self.user_db.add_oauth_account(user, oauth_account_dict)
+                    await self.on_after_register(user, request)

            else:
-                # User exists, update OAuth account if needed
-                if user is not None:  # Add explicit check
-                    for existing_oauth_account in user.oauth_accounts:
-                        if (
-                            existing_oauth_account.account_id == account_id
-                            and existing_oauth_account.oauth_name == oauth_name
-                        ):
-                            user = await self.user_db.update_oauth_account(
-                                user,
-                                # NOTE: OAuthAccount DOES implement the OAuthAccountProtocol
-                                # but the type checker doesn't know that :(
-                                existing_oauth_account,  # type: ignore
-                                oauth_account_dict,
-                            )
-
-            # Ensure user is not None before proceeding
-            if user is None:
-                raise HTTPException(
-                    status_code=500, detail="Failed to authenticate or create user"
-                )
+                for existing_oauth_account in user.oauth_accounts:
+                    if (
+                        existing_oauth_account.account_id == account_id
+                        and existing_oauth_account.oauth_name == oauth_name
+                    ):
+                        user = await self.user_db.update_oauth_account(
+                            user,
+                            # NOTE: OAuthAccount DOES implement the OAuthAccountProtocol
+                            # but the type checker doesn't know that :(
+                            existing_oauth_account,  # type: ignore
+                            oauth_account_dict,
+                        )

            # NOTE: Most IdPs have very short expiry times, and we don't want to force the user to
            # re-authenticate that frequently, so by default this is disabled
@@ -568,7 +531,7 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):
            async_return_default_schema,
        )(email=user.email)

-        send_forgot_password_email(user.email, tenant_id=tenant_id, token=token)
+        send_forgot_password_email(user.email, token, tenant_id=tenant_id)

    async def on_after_request_verify(
        self, user: User, token: str, request: Optional[Request] = None
@@ -632,39 +595,6 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):

            return user

-    async def reset_password_as_admin(self, user_id: uuid.UUID) -> str:
-        """Admin-only. Generate a random password for a user and return it."""
-        user = await self.get(user_id)
-        new_password = generate_password()
-        await self._update(user, {"password": new_password})
-        return new_password
-
-    async def change_password_if_old_matches(
-        self, user: User, old_password: str, new_password: str
-    ) -> None:
-        """
-        For normal users to change password if they know the old one.
-        Raises 400 if old password doesn't match.
-        """
-        verified, updated_password_hash = self.password_helper.verify_and_update(
-            old_password, user.hashed_password
-        )
-        if not verified:
-            # Raise some HTTPException (or your custom exception) if old password is invalid:
-            from fastapi import HTTPException, status
-
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail="Invalid current password",
-            )
-
-        # If the hash was upgraded behind the scenes, we can keep it before setting the new password:
-        if updated_password_hash:
-            user.hashed_password = updated_password_hash
-
-        # Now apply and validate the new password
-        await self._update(user, {"password": new_password})
-

 async def get_user_manager(
    user_db: SQLAlchemyUserDatabase = Depends(get_user_db),
@@ -889,9 +819,8 @@ async def current_limited_user(

 async def current_chat_accesssible_user(
    user: User | None = Depends(optional_user),
+    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> User | None:
-    tenant_id = get_current_tenant_id()
-
    return await double_check_user(
        user, allow_anonymous_access=anonymous_user_enabled(tenant_id=tenant_id)
    )
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -2,7 +2,6 @@ import logging
 import multiprocessing
 import time
 from typing import Any
-from typing import cast

 import sentry_sdk
 from celery import Task
@@ -34,7 +33,6 @@ from onyx.redis.redis_connector_ext_group_sync import RedisConnectorExternalGrou
 from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_document_set import RedisDocumentSet
 from onyx.redis.redis_pool import get_redis_client
-from onyx.redis.redis_pool import get_shared_redis_client
 from onyx.redis.redis_usergroup import RedisUserGroup
 from onyx.utils.logger import ColoredFormatter
 from onyx.utils.logger import PlainFormatter
@@ -60,35 +58,13 @@ else:
    logger.debug("Sentry DSN not provided, skipping Sentry initialization")


-class TenantAwareTask(Task):
-    """A custom base Task that sets tenant_id in a contextvar before running."""
-
-    abstract = True  # So Celery knows not to register this as a real task.
-
-    def __call__(self, *args: Any, **kwargs: Any) -> Any:
-        # Grab tenant_id from the kwargs, or fallback to default if missing.
-        tenant_id = kwargs.get("tenant_id", None) or POSTGRES_DEFAULT_SCHEMA
-
-        # Set the context var
-        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
-
-        # Actually run the task now
-        try:
-            return super().__call__(*args, **kwargs)
-        finally:
-            # Clear or reset after the task runs
-            # so it does not leak into any subsequent tasks on the same worker process
-            CURRENT_TENANT_ID_CONTEXTVAR.set(None)
-
-
-@task_prerun.connect
 def on_task_prerun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple[Any, ...] | None = None,
    kwargs: dict[str, Any] | None = None,
-    **other_kwargs: Any,
+    **kwds: Any,
 ) -> None:
    pass

@@ -132,9 +108,9 @@ def on_task_postrun(
    # Get tenant_id directly from kwargs- each celery task has a tenant_id kwarg
    if not kwargs:
        logger.error(f"Task {task.name} (ID: {task_id}) is missing kwargs")
-        tenant_id = POSTGRES_DEFAULT_SCHEMA
+        tenant_id = None
    else:
-        tenant_id = cast(str, kwargs.get("tenant_id", POSTGRES_DEFAULT_SCHEMA))
+        tenant_id = kwargs.get("tenant_id")

    task_logger.debug(
        f"Task {task.name} (ID: {task_id}) completed with state: {state} "
@@ -225,7 +201,7 @@ def wait_for_redis(sender: Any, **kwargs: Any) -> None:
    Will raise WorkerShutdown to kill the celery worker if the timeout
    is reached."""

-    r = get_shared_redis_client()
+    r = get_redis_client(tenant_id=None)

    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60
@@ -311,7 +287,7 @@ def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:
    # Set up variables for waiting on primary worker
    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60
-    r = get_shared_redis_client()
+    r = get_redis_client(tenant_id=None)
    time_start = time.monotonic()

    logger.info("Waiting for primary worker to be ready...")
@@ -463,6 +439,24 @@ class TenantContextFilter(logging.Filter):
        return True


+@task_prerun.connect
+def set_tenant_id(
+    sender: Any | None = None,
+    task_id: str | None = None,
+    task: Task | None = None,
+    args: tuple[Any, ...] | None = None,
+    kwargs: dict[str, Any] | None = None,
+    **other_kwargs: Any,
+) -> None:
+    """Signal handler to set tenant ID in context var before task starts."""
+    tenant_id = (
+        kwargs.get("tenant_id", POSTGRES_DEFAULT_SCHEMA)
+        if kwargs
+        else POSTGRES_DEFAULT_SCHEMA
+    )
+    CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+
+
@task_postrun.connect
 def reset_tenant_id(
    sender: Any | None = None,
--- a/backend/onyx/background/celery/apps/beat.py
+++ b/backend/onyx/background/celery/apps/beat.py
@@ -132,7 +132,6 @@ class DynamicTenantScheduler(PersistentScheduler):
                        f"Adding options to task {tenant_task_name}: {options}"
                    )
                    tenant_task["options"] = options
-
                new_schedule[tenant_task_name] = tenant_task

        return new_schedule
@@ -257,4 +256,3 @@ def on_setup_logging(


 celery_app.conf.beat_scheduler = DynamicTenantScheduler
-celery_app.conf.task_default_base = app_base.TenantAwareTask
--- a/backend/onyx/background/celery/apps/heavy.py
+++ b/backend/onyx/background/celery/apps/heavy.py
@@ -20,7 +20,6 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.heavy")
-celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
--- a/backend/onyx/background/celery/apps/indexing.py
+++ b/backend/onyx/background/celery/apps/indexing.py
@@ -21,7 +21,6 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.indexing")
-celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
--- a/backend/onyx/background/celery/apps/light.py
+++ b/backend/onyx/background/celery/apps/light.py
@@ -23,7 +23,6 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.light")
-celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
--- a/backend/onyx/background/celery/apps/monitoring.py
+++ b/backend/onyx/background/celery/apps/monitoring.py
@@ -20,7 +20,6 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.monitoring")
-celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -24,7 +24,7 @@ from onyx.configs.constants import CELERY_PRIMARY_WORKER_LOCK_TIMEOUT
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.configs.constants import POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_default_tenant
 from onyx.db.engine import SqlEngine
 from onyx.db.index_attempt import get_index_attempt
 from onyx.db.index_attempt import mark_attempt_canceled
@@ -38,7 +38,7 @@ from onyx.redis.redis_connector_index import RedisConnectorIndex
 from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_connector_stop import RedisConnectorStop
 from onyx.redis.redis_document_set import RedisDocumentSet
-from onyx.redis.redis_pool import get_shared_redis_client
+from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_usergroup import RedisUserGroup
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT
@@ -47,7 +47,6 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.primary")
-celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
@@ -102,7 +101,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:

    # This is singleton work that should be done on startup exactly once
    # by the primary worker. This is unnecessary in the multi tenant scenario
-    r = get_shared_redis_client()
+    r = get_redis_client(tenant_id=None)

    # Log the role and slave count - being connected to a slave or slave count > 0 could be problematic
    info: dict[str, Any] = cast(dict, r.info("replication"))
@@ -145,6 +144,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
    # As currently designed, when this worker starts as "primary", we reinitialize redis
    # to a clean state (for our purposes, anyway)
    r.delete(OnyxRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK)
+    r.delete(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)

    r.delete(OnyxRedisConstants.ACTIVE_FENCES)

@@ -159,7 +159,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
    RedisConnectorExternalGroupSync.reset_all(r)

    # mark orphaned index attempts as failed
-    with get_session_with_current_tenant() as db_session:
+    with get_session_with_default_tenant() as db_session:
        unfenced_attempt_ids = get_unfenced_index_attempt_ids(db_session, r)
        for attempt_id in unfenced_attempt_ids:
            attempt = get_index_attempt(db_session, attempt_id)
@@ -235,7 +235,7 @@ class HubPeriodicTask(bootsteps.StartStopStep):

            lock: RedisLock = worker.primary_worker_lock

-            r = get_shared_redis_client()
+            r = get_redis_client(tenant_id=None)

            if lock.owned():
                task_logger.debug("Reacquiring primary worker lock.")
--- a/backend/onyx/background/celery/celery_redis.py
+++ b/backend/onyx/background/celery/celery_redis.py
@@ -92,8 +92,7 @@ def celery_find_task(task_id: str, queue: str, r: Redis) -> int:


 def celery_get_queued_task_ids(queue: str, r: Redis) -> set[str]:
-    """This is a redis specific way to build a list of tasks in a queue and return them
-    as a set.
+    """This is a redis specific way to build a list of tasks in a queue.

    This helps us read the queue once and then efficiently look for missing tasks
    in the queue.
--- a/backend/onyx/background/celery/celery_utils.py
+++ b/backend/onyx/background/celery/celery_utils.py
@@ -34,7 +34,7 @@ def _get_deletion_status(
    connector_id: int,
    credential_id: int,
    db_session: Session,
-    tenant_id: str,
+    tenant_id: str | None = None,
 ) -> TaskQueueState | None:
    """We no longer store TaskQueueState in the DB for a deletion attempt.
    This function populates TaskQueueState by just checking redis.
@@ -67,7 +67,7 @@ def get_deletion_attempt_snapshot(
    connector_id: int,
    credential_id: int,
    db_session: Session,
-    tenant_id: str,
+    tenant_id: str | None = None,
 ) -> DeletionAttemptSnapshot | None:
    deletion_task = _get_deletion_status(
        connector_id, credential_id, db_session, tenant_id
--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -19,7 +19,6 @@ BEAT_EXPIRES_DEFAULT = 15 * 60  # 15 minutes (in seconds)

 # hack to slow down task dispatch in the cloud until
 # we have a better implementation (backpressure, etc)
-# Note that DynamicTenantScheduler can adjust the runtime value for this via Redis
 CLOUD_BEAT_MULTIPLIER_DEFAULT = 8.0

 # tasks that run in either self-hosted on cloud
@@ -36,15 +35,6 @@ beat_task_templates.extend(
                "expires": BEAT_EXPIRES_DEFAULT,
            },
        },
-        {
-            "name": "check-for-checkpoint-cleanup",
-            "task": OnyxCeleryTask.CHECK_FOR_CHECKPOINT_CLEANUP,
-            "schedule": timedelta(hours=1),
-            "options": {
-                "priority": OnyxCeleryPriority.LOW,
-                "expires": BEAT_EXPIRES_DEFAULT,
-            },
-        },
        {
            "name": "check-for-connector-deletion",
            "task": OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
@@ -66,7 +56,16 @@ beat_task_templates.extend(
        {
            "name": "check-for-pruning",
            "task": OnyxCeleryTask.CHECK_FOR_PRUNING,
-            "schedule": timedelta(seconds=20),
+            "schedule": timedelta(hours=1),
+            "options": {
+                "priority": OnyxCeleryPriority.MEDIUM,
+                "expires": BEAT_EXPIRES_DEFAULT,
+            },
+        },
+        {
+            "name": "monitor-vespa-sync",
+            "task": OnyxCeleryTask.MONITOR_VESPA_SYNC,
+            "schedule": timedelta(seconds=5),
            "options": {
                "priority": OnyxCeleryPriority.MEDIUM,
                "expires": BEAT_EXPIRES_DEFAULT,
@@ -142,14 +141,14 @@ def make_cloud_generator_task(task: dict[str, Any]) -> dict[str, Any]:
    return cloud_task


-# tasks that only run in the cloud and are system wide
+# tasks that only run in the cloud
 # the name attribute must start with ONYX_CLOUD_CELERY_TASK_PREFIX = "cloud" to be seen
 # by the DynamicTenantScheduler as system wide task and not a per tenant task
-beat_cloud_tasks: list[dict] = [
+beat_system_tasks: list[dict] = [
    # cloud specific tasks
    {
-        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-alembic",
-        "task": OnyxCeleryTask.CLOUD_MONITOR_ALEMBIC,
+        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-alembic",
+        "task": OnyxCeleryTask.CLOUD_CHECK_ALEMBIC,
        "schedule": timedelta(hours=1),
        "options": {
            "queue": OnyxCeleryQueues.MONITORING,
@@ -157,37 +156,11 @@ beat_cloud_tasks: list[dict] = [
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
-    {
-        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-celery-queues",
-        "task": OnyxCeleryTask.CLOUD_MONITOR_CELERY_QUEUES,
-        "schedule": timedelta(seconds=30),
-        "options": {
-            "queue": OnyxCeleryQueues.MONITORING,
-            "priority": OnyxCeleryPriority.HIGH,
-            "expires": BEAT_EXPIRES_DEFAULT,
-        },
-    },
 ]

-# tasks that only run self hosted
 tasks_to_schedule: list[dict] = []
 if not MULTI_TENANT:
-    tasks_to_schedule.extend(
-        [
-            {
-                "name": "monitor-celery-queues",
-                "task": OnyxCeleryTask.MONITOR_CELERY_QUEUES,
-                "schedule": timedelta(seconds=10),
-                "options": {
-                    "priority": OnyxCeleryPriority.MEDIUM,
-                    "expires": BEAT_EXPIRES_DEFAULT,
-                    "queue": OnyxCeleryQueues.MONITORING,
-                },
-            },
-        ]
-    )
-
-    tasks_to_schedule.extend(beat_task_templates)
+    tasks_to_schedule = beat_task_templates


 def generate_cloud_tasks(
@@ -207,24 +180,23 @@ def generate_cloud_tasks(
    if beat_multiplier <= 0:
        raise ValueError("beat_multiplier must be positive!")

-    cloud_tasks: list[dict] = []
+    # start with the incoming beat tasks
+    cloud_tasks: list[dict] = copy.deepcopy(beat_tasks)

-    # generate our tenant aware cloud tasks from the templates
+    # generate our cloud tasks from the templates
    for beat_template in beat_templates:
        cloud_task = make_cloud_generator_task(beat_template)
        cloud_tasks.append(cloud_task)

-    # factor in the cloud multiplier for the above
+    # factor in the cloud multiplier
    for cloud_task in cloud_tasks:
        cloud_task["schedule"] = cloud_task["schedule"] * beat_multiplier

-    # add the fixed cloud/system beat tasks. No multiplier for these.
-    cloud_tasks.extend(copy.deepcopy(beat_tasks))
    return cloud_tasks


 def get_cloud_tasks_to_schedule(beat_multiplier: float) -> list[dict[str, Any]]:
-    return generate_cloud_tasks(beat_cloud_tasks, beat_task_templates, beat_multiplier)
+    return generate_cloud_tasks(beat_system_tasks, beat_task_templates, beat_multiplier)


 def get_tasks_to_schedule() -> list[dict[str, Any]]:
--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -1,55 +1,29 @@
-import traceback
 from datetime import datetime
 from datetime import timezone
-from typing import Any
-from typing import cast

 from celery import Celery
 from celery import shared_task
 from celery import Task
 from celery.exceptions import SoftTimeLimitExceeded
-from pydantic import ValidationError
-from redis import Redis
 from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
-from onyx.background.celery.celery_redis import celery_get_queue_length
-from onyx.background.celery.celery_redis import celery_get_queued_task_ids
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
-from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
-from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
-from onyx.configs.constants import OnyxRedisSignals
-from onyx.db.connector import fetch_connector_by_id
-from onyx.db.connector_credential_pair import add_deletion_failure_message
-from onyx.db.connector_credential_pair import (
-    delete_connector_credential_pair__no_commit,
-)
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
-from onyx.db.document import get_document_ids_for_connector_credential_pair
-from onyx.db.document_set import delete_document_set_cc_pair_relationship__no_commit
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
-from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
-from onyx.db.index_attempt import delete_index_attempts
 from onyx.db.search_settings import get_all_search_settings
 from onyx.db.sync_record import cleanup_sync_records
 from onyx.db.sync_record import insert_sync_record
-from onyx.db.sync_record import update_sync_record_status
 from onyx.redis.redis_connector import RedisConnector
-from onyx.redis.redis_connector_delete import RedisConnectorDelete
 from onyx.redis.redis_connector_delete import RedisConnectorDeletePayload
 from onyx.redis.redis_pool import get_redis_client
-from onyx.redis.redis_pool import get_redis_replica_client
-from onyx.utils.variable_functionality import (
-    fetch_versioned_implementation_with_fallback,
-)
-from onyx.utils.variable_functionality import noop_fallback


 class TaskDependencyError(RuntimeError):
@@ -57,51 +31,6 @@ class TaskDependencyError(RuntimeError):
    with connector deletion."""


-def revoke_tasks_blocking_deletion(
-    redis_connector: RedisConnector, db_session: Session, app: Celery
-) -> None:
-    search_settings_list = get_all_search_settings(db_session)
-    for search_settings in search_settings_list:
-        redis_connector_index = redis_connector.new_index(search_settings.id)
-        try:
-            index_payload = redis_connector_index.payload
-            if index_payload and index_payload.celery_task_id:
-                app.control.revoke(index_payload.celery_task_id)
-                task_logger.info(
-                    f"Revoked indexing task {index_payload.celery_task_id}."
-                )
-        except Exception:
-            task_logger.exception("Exception while revoking indexing task")
-
-    try:
-        permissions_sync_payload = redis_connector.permissions.payload
-        if permissions_sync_payload and permissions_sync_payload.celery_task_id:
-            app.control.revoke(permissions_sync_payload.celery_task_id)
-            task_logger.info(
-                f"Revoked permissions sync task {permissions_sync_payload.celery_task_id}."
-            )
-    except Exception:
-        task_logger.exception("Exception while revoking pruning task")
-
-    try:
-        prune_payload = redis_connector.prune.payload
-        if prune_payload and prune_payload.celery_task_id:
-            app.control.revoke(prune_payload.celery_task_id)
-            task_logger.info(f"Revoked pruning task {prune_payload.celery_task_id}.")
-    except Exception:
-        task_logger.exception("Exception while revoking permissions sync task")
-
-    try:
-        external_group_sync_payload = redis_connector.external_group_sync.payload
-        if external_group_sync_payload and external_group_sync_payload.celery_task_id:
-            app.control.revoke(external_group_sync_payload.celery_task_id)
-            task_logger.info(
-                f"Revoked external group sync task {external_group_sync_payload.celery_task_id}."
-            )
-    except Exception:
-        task_logger.exception("Exception while revoking external group sync task")
-
-
@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
    ignore_result=True,
@@ -109,46 +38,31 @@ def revoke_tasks_blocking_deletion(
    trail=False,
    bind=True,
 )
-def check_for_connector_deletion_task(self: Task, *, tenant_id: str) -> bool | None:
-    r = get_redis_client()
-    r_replica = get_redis_replica_client()
-    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore
+def check_for_connector_deletion_task(
+    self: Task, *, tenant_id: str | None
+) -> bool | None:
+    r = get_redis_client(tenant_id=tenant_id)

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK,
        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
    )

-    # Prevent this task from overlapping with itself
+    # these tasks should never overlap
    if not lock_beat.acquire(blocking=False):
        return None

    try:
-        # we want to run this less frequently than the overall task
-        lock_beat.reacquire()
-        if not r.exists(OnyxRedisSignals.BLOCK_VALIDATE_CONNECTOR_DELETION_FENCES):
-            # clear fences that don't have associated celery tasks in progress
-            try:
-                validate_connector_deletion_fences(
-                    tenant_id, r, r_replica, r_celery, lock_beat
-                )
-            except Exception:
-                task_logger.exception(
-                    "Exception while validating connector deletion fences"
-                )
-
-            r.set(OnyxRedisSignals.BLOCK_VALIDATE_CONNECTOR_DELETION_FENCES, 1, ex=300)
-
        # collect cc_pair_ids
        cc_pair_ids: list[int] = []
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pairs = get_connector_credential_pairs(db_session)
            for cc_pair in cc_pairs:
                cc_pair_ids.append(cc_pair.id)

        # try running cleanup on the cc_pair_ids
        for cc_pair_id in cc_pair_ids:
-            with get_session_with_current_tenant() as db_session:
+            with get_session_with_tenant(tenant_id) as db_session:
                redis_connector = RedisConnector(tenant_id, cc_pair_id)
                try:
                    try_generate_document_cc_pair_cleanup_tasks(
@@ -156,54 +70,13 @@ def check_for_connector_deletion_task(self: Task, *, tenant_id: str) -> bool | N
                    )
                except TaskDependencyError as e:
                    # this means we wanted to start deleting but dependent tasks were running
-                    # on the first error, we set a stop signal and revoke the dependent tasks
-                    # on subsequent errors, we hard reset blocking fences after our specified timeout
-                    # is exceeded
+                    # Leave a stop signal to clear indexing and pruning tasks more quickly
                    task_logger.info(str(e))
-
-                    if not redis_connector.stop.fenced:
-                        # one time revoke of celery tasks
-                        task_logger.info("Revoking any tasks blocking deletion.")
-                        revoke_tasks_blocking_deletion(
-                            redis_connector, db_session, self.app
-                        )
-                        redis_connector.stop.set_fence(True)
-                        redis_connector.stop.set_timeout()
-                    else:
-                        # stop signal already set
-                        if redis_connector.stop.timed_out:
-                            # waiting too long, just reset blocking fences
-                            task_logger.info(
-                                "Timed out waiting for tasks blocking deletion. Resetting blocking fences."
-                            )
-                            search_settings_list = get_all_search_settings(db_session)
-                            for search_settings in search_settings_list:
-                                redis_connector_index = redis_connector.new_index(
-                                    search_settings.id
-                                )
-                                redis_connector_index.reset()
-                            redis_connector.prune.reset()
-                            redis_connector.permissions.reset()
-                            redis_connector.external_group_sync.reset()
-                        else:
-                            # just wait
-                            pass
+                    redis_connector.stop.set_fence(True)
                else:
                    # clear the stop signal if it exists ... no longer needed
                    redis_connector.stop.set_fence(False)

-        lock_beat.reacquire()
-        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
-        for key in keys:
-            key_bytes = cast(bytes, key)
-
-            if not r.exists(key_bytes):
-                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
-                continue
-
-            key_str = key_bytes.decode("utf-8")
-            if key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):
-                monitor_connector_deletion_taskset(tenant_id, key_bytes, r)
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
@@ -222,7 +95,7 @@ def try_generate_document_cc_pair_cleanup_tasks(
    cc_pair_id: int,
    db_session: Session,
    lock_beat: RedisLock,
-    tenant_id: str,
+    tenant_id: str | None,
 ) -> int | None:
    """Returns an int if syncing is needed. The int represents the number of sync tasks generated.
    Note that syncing can still be required even if the number of sync tasks generated is zero.
@@ -262,7 +135,6 @@ def try_generate_document_cc_pair_cleanup_tasks(
        return None

    # set a basic fence to start
-    redis_connector.delete.set_active()
    fence_payload = RedisConnectorDeletePayload(
        num_tasks=None,
        submitted=datetime.now(timezone.utc),
@@ -340,326 +212,3 @@ def try_generate_document_cc_pair_cleanup_tasks(
        redis_connector.delete.set_fence(fence_payload)

    return tasks_generated
-
-
-def monitor_connector_deletion_taskset(
-    tenant_id: str, key_bytes: bytes, r: Redis
-) -> None:
-    fence_key = key_bytes.decode("utf-8")
-    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
-    if cc_pair_id_str is None:
-        task_logger.warning(f"could not parse cc_pair_id from {fence_key}")
-        return
-
-    cc_pair_id = int(cc_pair_id_str)
-
-    redis_connector = RedisConnector(tenant_id, cc_pair_id)
-
-    fence_data = redis_connector.delete.payload
-    if not fence_data:
-        task_logger.warning(
-            f"Connector deletion - fence payload invalid: cc_pair={cc_pair_id}"
-        )
-        return
-
-    if fence_data.num_tasks is None:
-        # the fence is setting up but isn't ready yet
-        return
-
-    remaining = redis_connector.delete.get_remaining()
-    task_logger.info(
-        f"Connector deletion progress: cc_pair={cc_pair_id} remaining={remaining} initial={fence_data.num_tasks}"
-    )
-    if remaining > 0:
-        with get_session_with_current_tenant() as db_session:
-            update_sync_record_status(
-                db_session=db_session,
-                entity_id=cc_pair_id,
-                sync_type=SyncType.CONNECTOR_DELETION,
-                sync_status=SyncStatus.IN_PROGRESS,
-                num_docs_synced=remaining,
-            )
-        return
-
-    with get_session_with_current_tenant() as db_session:
-        cc_pair = get_connector_credential_pair_from_id(
-            db_session=db_session,
-            cc_pair_id=cc_pair_id,
-        )
-        if not cc_pair:
-            task_logger.warning(
-                f"Connector deletion - cc_pair not found: cc_pair={cc_pair_id}"
-            )
-            return
-
-        try:
-            doc_ids = get_document_ids_for_connector_credential_pair(
-                db_session, cc_pair.connector_id, cc_pair.credential_id
-            )
-            if len(doc_ids) > 0:
-                # NOTE(rkuo): if this happens, documents somehow got added while
-                # deletion was in progress. Likely a bug gating off pruning and indexing
-                # work before deletion starts.
-                task_logger.warning(
-                    "Connector deletion - documents still found after taskset completion. "
-                    "Clearing the current deletion attempt and allowing deletion to restart: "
-                    f"cc_pair={cc_pair_id} "
-                    f"docs_deleted={fence_data.num_tasks} "
-                    f"docs_remaining={len(doc_ids)}"
-                )
-
-                # We don't want to waive off why we get into this state, but resetting
-                # our attempt and letting the deletion restart is a good way to recover
-                redis_connector.delete.reset()
-                raise RuntimeError(
-                    "Connector deletion - documents still found after taskset completion"
-                )
-
-            # clean up the rest of the related Postgres entities
-            # index attempts
-            delete_index_attempts(
-                db_session=db_session,
-                cc_pair_id=cc_pair_id,
-            )
-
-            # document sets
-            delete_document_set_cc_pair_relationship__no_commit(
-                db_session=db_session,
-                connector_id=cc_pair.connector_id,
-                credential_id=cc_pair.credential_id,
-            )
-
-            # user groups
-            cleanup_user_groups = fetch_versioned_implementation_with_fallback(
-                "onyx.db.user_group",
-                "delete_user_group_cc_pair_relationship__no_commit",
-                noop_fallback,
-            )
-            cleanup_user_groups(
-                cc_pair_id=cc_pair_id,
-                db_session=db_session,
-            )
-
-            # finally, delete the cc-pair
-            delete_connector_credential_pair__no_commit(
-                db_session=db_session,
-                connector_id=cc_pair.connector_id,
-                credential_id=cc_pair.credential_id,
-            )
-            # if there are no credentials left, delete the connector
-            connector = fetch_connector_by_id(
-                db_session=db_session,
-                connector_id=cc_pair.connector_id,
-            )
-            if not connector or not len(connector.credentials):
-                task_logger.info(
-                    "Connector deletion - Found no credentials left for connector, deleting connector"
-                )
-                db_session.delete(connector)
-            db_session.commit()
-
-            update_sync_record_status(
-                db_session=db_session,
-                entity_id=cc_pair_id,
-                sync_type=SyncType.CONNECTOR_DELETION,
-                sync_status=SyncStatus.SUCCESS,
-                num_docs_synced=fence_data.num_tasks,
-            )
-
-        except Exception as e:
-            db_session.rollback()
-            stack_trace = traceback.format_exc()
-            error_message = f"Error: {str(e)}\n\nStack Trace:\n{stack_trace}"
-            add_deletion_failure_message(db_session, cc_pair_id, error_message)
-
-            update_sync_record_status(
-                db_session=db_session,
-                entity_id=cc_pair_id,
-                sync_type=SyncType.CONNECTOR_DELETION,
-                sync_status=SyncStatus.FAILED,
-                num_docs_synced=fence_data.num_tasks,
-            )
-
-            task_logger.exception(
-                f"Connector deletion exceptioned: "
-                f"cc_pair={cc_pair_id} connector={cc_pair.connector_id} credential={cc_pair.credential_id}"
-            )
-            raise e
-
-    task_logger.info(
-        f"Connector deletion succeeded: "
-        f"cc_pair={cc_pair_id} "
-        f"connector={cc_pair.connector_id} "
-        f"credential={cc_pair.credential_id} "
-        f"docs_deleted={fence_data.num_tasks}"
-    )
-
-    redis_connector.delete.reset()
-
-
-def validate_connector_deletion_fences(
-    tenant_id: str,
-    r: Redis,
-    r_replica: Redis,
-    r_celery: Redis,
-    lock_beat: RedisLock,
-) -> None:
-    # building lookup table can be expensive, so we won't bother
-    # validating until the queue is small
-    CONNECTION_DELETION_VALIDATION_MAX_QUEUE_LEN = 1024
-
-    queue_len = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_DELETION, r_celery)
-    if queue_len > CONNECTION_DELETION_VALIDATION_MAX_QUEUE_LEN:
-        return
-
-    queued_upsert_tasks = celery_get_queued_task_ids(
-        OnyxCeleryQueues.CONNECTOR_DELETION, r_celery
-    )
-
-    # validate all existing connector deletion jobs
-    lock_beat.reacquire()
-    keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
-    for key in keys:
-        key_bytes = cast(bytes, key)
-        key_str = key_bytes.decode("utf-8")
-        if not key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):
-            continue
-
-        validate_connector_deletion_fence(
-            tenant_id,
-            key_bytes,
-            queued_upsert_tasks,
-            r,
-        )
-
-        lock_beat.reacquire()
-
-    return
-
-
-def validate_connector_deletion_fence(
-    tenant_id: str,
-    key_bytes: bytes,
-    queued_tasks: set[str],
-    r: Redis,
-) -> None:
-    """Checks for the error condition where an indexing fence is set but the associated celery tasks don't exist.
-    This can happen if the indexing worker hard crashes or is terminated.
-    Being in this bad state means the fence will never clear without help, so this function
-    gives the help.
-
-    How this works:
-    1. This function renews the active signal with a 5 minute TTL under the following conditions
-    1.2. When the task is seen in the redis queue
-    1.3. When the task is seen in the reserved / prefetched list
-
-    2. Externally, the active signal is renewed when:
-    2.1. The fence is created
-    2.2. The indexing watchdog checks the spawned task.
-
-    3. The TTL allows us to get through the transitions on fence startup
-    and when the task starts executing.
-
-    More TTL clarification: it is seemingly impossible to exactly query Celery for
-    whether a task is in the queue or currently executing.
-    1. An unknown task id is always returned as state PENDING.
-    2. Redis can be inspected for the task id, but the task id is gone between the time a worker receives the task
-    and the time it actually starts on the worker.
-
-    queued_tasks: the celery queue of lightweight permission sync tasks
-    reserved_tasks: prefetched tasks for sync task generator
-    """
-    # if the fence doesn't exist, there's nothing to do
-    fence_key = key_bytes.decode("utf-8")
-    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
-    if cc_pair_id_str is None:
-        task_logger.warning(
-            f"validate_connector_deletion_fence - could not parse id from {fence_key}"
-        )
-        return
-
-    cc_pair_id = int(cc_pair_id_str)
-    # parse out metadata and initialize the helper class with it
-    redis_connector = RedisConnector(tenant_id, int(cc_pair_id))
-
-    # check to see if the fence/payload exists
-    if not redis_connector.delete.fenced:
-        return
-
-    # in the cloud, the payload format may have changed ...
-    # it's a little sloppy, but just reset the fence for now if that happens
-    # TODO: add intentional cleanup/abort logic
-    try:
-        payload = redis_connector.delete.payload
-    except ValidationError:
-        task_logger.exception(
-            "validate_connector_deletion_fence - "
-            "Resetting fence because fence schema is out of date: "
-            f"cc_pair={cc_pair_id} "
-            f"fence={fence_key}"
-        )
-
-        redis_connector.delete.reset()
-        return
-
-    if not payload:
-        return
-
-    # OK, there's actually something for us to validate
-
-    # look up every task in the current taskset in the celery queue
-    # every entry in the taskset should have an associated entry in the celery task queue
-    # because we get the celery tasks first, the entries in our own permissions taskset
-    # should be roughly a subset of the tasks in celery
-
-    # this check isn't very exact, but should be sufficient over a period of time
-    # A single successful check over some number of attempts is sufficient.
-
-    # TODO: if the number of tasks in celery is much lower than than the taskset length
-    # we might be able to shortcut the lookup since by definition some of the tasks
-    # must not exist in celery.
-
-    tasks_scanned = 0
-    tasks_not_in_celery = 0  # a non-zero number after completing our check is bad
-
-    for member in r.sscan_iter(redis_connector.delete.taskset_key):
-        tasks_scanned += 1
-
-        member_bytes = cast(bytes, member)
-        member_str = member_bytes.decode("utf-8")
-        if member_str in queued_tasks:
-            continue
-
-        tasks_not_in_celery += 1
-
-    task_logger.info(
-        "validate_connector_deletion_fence task check: "
-        f"tasks_scanned={tasks_scanned} tasks_not_in_celery={tasks_not_in_celery}"
-    )
-
-    # we're active if there are still tasks to run and those tasks all exist in celery
-    if tasks_scanned > 0 and tasks_not_in_celery == 0:
-        redis_connector.delete.set_active()
-        return
-
-    # we may want to enable this check if using the active task list somehow isn't good enough
-    # if redis_connector_index.generator_locked():
-    #     logger.info(f"{payload.celery_task_id} is currently executing.")
-
-    # if we get here, we didn't find any direct indication that the associated celery tasks exist,
-    # but they still might be there due to gaps in our ability to check states during transitions
-    # Checking the active signal safeguards us against these transition periods
-    # (which has a duration that allows us to bridge those gaps)
-    if redis_connector.delete.active():
-        return
-
-    # celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.
-    task_logger.warning(
-        "validate_connector_deletion_fence - "
-        "Resetting fence because no associated celery tasks were found: "
-        f"cc_pair={cc_pair_id} "
-        f"fence={fence_key}"
-    )
-
-    redis_connector.delete.reset()
-    return
--- a/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
+++ b/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
@@ -30,7 +30,6 @@ from onyx.background.celery.celery_redis import celery_find_task
 from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_queued_task_ids
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
-from onyx.background.celery.tasks.shared.tasks import OnyxCeleryTaskCompletionStatus
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT
@@ -43,12 +42,10 @@ from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.configs.constants import OnyxRedisSignals
-from onyx.connectors.factory import validate_ccpair_for_user
 from onyx.db.connector import mark_cc_pair_as_permissions_synced
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
-from onyx.db.connector_credential_pair import update_connector_credential_pair
 from onyx.db.document import upsert_document_by_connector_credential_pair
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import AccessType
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import SyncStatus
@@ -66,7 +63,6 @@ from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.redis.redis_pool import redis_lock_dump
 from onyx.server.utils import make_short_id
 from onyx.utils.logger import doc_permission_sync_ctx
-from onyx.utils.logger import format_error_for_logging
 from onyx.utils.logger import LoggerContextVars
 from onyx.utils.logger import setup_logger

@@ -123,13 +119,13 @@ def _is_external_doc_permissions_sync_due(cc_pair: ConnectorCredentialPair) -> b
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
 )
-def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None:
+def check_for_doc_permissions_sync(self: Task, *, tenant_id: str | None) -> bool | None:
    # TODO(rkuo): merge into check function after lookup table for fences is added

    # we need to use celery's redis client to access its redis data
    # (which lives on a different db number)
-    r = get_redis_client()
-    r_replica = get_redis_replica_client()
+    r = get_redis_client(tenant_id=tenant_id)
+    r_replica = get_redis_replica_client(tenant_id=tenant_id)
    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
@@ -144,7 +140,7 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None
    try:
        # get all cc pairs that need to be synced
        cc_pair_ids_to_sync: list[int] = []
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pairs = get_all_auto_sync_cc_pairs(db_session)

            for cc_pair in cc_pairs:
@@ -179,37 +175,12 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None
                )

            r.set(OnyxRedisSignals.BLOCK_VALIDATE_PERMISSION_SYNC_FENCES, 1, ex=300)
-
-        # use a lookup table to find active fences. We still have to verify the fence
-        # exists since it is an optimization and not the source of truth.
-        lock_beat.reacquire()
-        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
-        for key in keys:
-            key_bytes = cast(bytes, key)
-
-            if not r.exists(key_bytes):
-                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
-                continue
-
-            key_str = key_bytes.decode("utf-8")
-            if key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):
-                with get_session_with_current_tenant() as db_session:
-                    monitor_ccpair_permissions_taskset(
-                        tenant_id, key_bytes, r, db_session
-                    )
-        task_logger.info(f"check_for_doc_permissions_sync finished: tenant={tenant_id}")
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
-    except Exception as e:
-        error_msg = format_error_for_logging(e)
-        task_logger.warning(
-            f"Unexpected check_for_doc_permissions_sync exception: tenant={tenant_id} {error_msg}"
-        )
-        task_logger.exception(
-            f"Unexpected check_for_doc_permissions_sync exception: tenant={tenant_id}"
-        )
+    except Exception:
+        task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
    finally:
        if lock_beat.owned():
            lock_beat.release()
@@ -221,7 +192,7 @@ def try_creating_permissions_sync_task(
    app: Celery,
    cc_pair_id: int,
    r: Redis,
-    tenant_id: str,
+    tenant_id: str | None,
 ) -> str | None:
    """Returns a randomized payload id on success.
    Returns None if no syncing is required."""
@@ -258,7 +229,7 @@ def try_creating_permissions_sync_task(
        # create before setting fence to avoid race condition where the monitoring
        # task updates the sync record before it is created
        try:
-            with get_session_with_current_tenant() as db_session:
+            with get_session_with_tenant(tenant_id) as db_session:
                insert_sync_record(
                    db_session=db_session,
                    entity_id=cc_pair_id,
@@ -293,19 +264,13 @@ def try_creating_permissions_sync_task(
        redis_connector.permissions.set_fence(payload)

        payload_id = payload.id
-    except Exception as e:
-        error_msg = format_error_for_logging(e)
-        task_logger.warning(
-            f"Unexpected try_creating_permissions_sync_task exception: cc_pair={cc_pair_id} {error_msg}"
-        )
+    except Exception:
+        task_logger.exception(f"Unexpected exception: cc_pair={cc_pair_id}")
        return None
    finally:
        if lock.owned():
            lock.release()

-    task_logger.info(
-        f"try_creating_permissions_sync_task finished: cc_pair={cc_pair_id} payload_id={payload_id}"
-    )
    return payload_id


@@ -320,7 +285,7 @@ def try_creating_permissions_sync_task(
 def connector_permission_sync_generator_task(
    self: Task,
    cc_pair_id: int,
-    tenant_id: str,
+    tenant_id: str | None,
 ) -> None:
    """
    Permission sync task that handles document permission syncing for a given connector credential pair
@@ -338,7 +303,7 @@ def connector_permission_sync_generator_task(

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    # this wait is needed to avoid a race condition where
    # the primary worker sends the task and it is immediately executed
@@ -384,7 +349,6 @@ def connector_permission_sync_generator_task(
        OnyxRedisLocks.CONNECTOR_DOC_PERMISSIONS_SYNC_LOCK_PREFIX
        + f"_{redis_connector.id}",
        timeout=CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT,
-        thread_local=False,
    )

    acquired = lock.acquire(blocking=False)
@@ -395,7 +359,7 @@ def connector_permission_sync_generator_task(
        return None

    try:
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
@@ -405,29 +369,6 @@ def connector_permission_sync_generator_task(
                    f"No connector credential pair found for id: {cc_pair_id}"
                )

-            try:
-                created = validate_ccpair_for_user(
-                    cc_pair.connector.id,
-                    cc_pair.credential.id,
-                    db_session,
-                    enforce_creation=False,
-                )
-                if not created:
-                    task_logger.warning(
-                        f"Unable to create connector credential pair for id: {cc_pair_id}"
-                    )
-            except Exception:
-                task_logger.exception(
-                    f"validate_ccpair_permissions_sync exceptioned: cc_pair={cc_pair_id}"
-                )
-                update_connector_credential_pair(
-                    db_session=db_session,
-                    connector_id=cc_pair.connector.id,
-                    credential_id=cc_pair.credential.id,
-                    status=ConnectorCredentialPairStatus.INVALID,
-                )
-                raise
-
            source_type = cc_pair.connector.source

            doc_sync_func = DOC_PERMISSIONS_FUNC_MAP.get(source_type)
@@ -479,10 +420,6 @@ def connector_permission_sync_generator_task(
            redis_connector.permissions.generator_complete = tasks_generated

    except Exception as e:
-        error_msg = format_error_for_logging(e)
-        task_logger.warning(
-            f"Permission sync exceptioned: cc_pair={cc_pair_id} payload_id={payload_id} {error_msg}"
-        )
        task_logger.exception(
            f"Permission sync exceptioned: cc_pair={cc_pair_id} payload_id={payload_id}"
        )
@@ -509,7 +446,7 @@ def connector_permission_sync_generator_task(
 )
 def update_external_document_permissions_task(
    self: Task,
-    tenant_id: str,
+    tenant_id: str | None,
    serialized_doc_external_access: dict,
    source_string: str,
    connector_id: int,
@@ -517,23 +454,19 @@ def update_external_document_permissions_task(
 ) -> bool:
    start = time.monotonic()

-    completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED
-
    document_external_access = DocExternalAccess.from_dict(
        serialized_doc_external_access
    )
    doc_id = document_external_access.doc_id
    external_access = document_external_access.external_access
-
    try:
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            # Add the users to the DB if they don't exist
            batch_add_ext_perm_user_if_not_exists(
                db_session=db_session,
                emails=list(external_access.external_user_emails),
-                continue_on_error=True,
            )
-            # Then upsert the document's external permissions
+            # Then we upsert the document's external permissions in postgres
            created_new_doc = upsert_document_external_perms(
                db_session=db_session,
                doc_id=doc_id,
@@ -557,34 +490,19 @@ def update_external_document_permissions_task(
                f"action=update_permissions "
                f"elapsed={elapsed:.2f}"
            )
-
-        completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
-    except Exception as e:
-        error_msg = format_error_for_logging(e)
-        task_logger.warning(
-            f"Exception in update_external_document_permissions_task: connector_id={connector_id} doc_id={doc_id} {error_msg}"
-        )
+    except Exception:
        task_logger.exception(
-            f"update_external_document_permissions_task exceptioned: "
-            f"connector_id={connector_id} doc_id={doc_id}"
+            f"Exception in update_external_document_permissions_task: "
+            f"connector_id={connector_id} "
+            f"doc_id={doc_id}"
        )
-        completion_status = OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
-    finally:
-        task_logger.info(
-            f"update_external_document_permissions_task completed: status={completion_status.value} doc={doc_id}"
-        )
-
-    if completion_status != OnyxCeleryTaskCompletionStatus.SUCCEEDED:
        return False

-    task_logger.info(
-        f"update_external_document_permissions_task finished: connector_id={connector_id} doc_id={doc_id}"
-    )
    return True


 def validate_permission_sync_fences(
-    tenant_id: str,
+    tenant_id: str | None,
    r: Redis,
    r_replica: Redis,
    r_celery: Redis,
@@ -631,7 +549,7 @@ def validate_permission_sync_fences(


 def validate_permission_sync_fence(
-    tenant_id: str,
+    tenant_id: str | None,
    key_bytes: bytes,
    queued_tasks: set[str],
    reserved_tasks: set[str],
@@ -837,11 +755,11 @@ class PermissionSyncCallback(IndexingHeartbeatInterface):
            raise


-"""Monitoring CCPair permissions utils"""
+"""Monitoring CCPair permissions utils, called in monitor_vespa_sync"""


 def monitor_ccpair_permissions_taskset(
-    tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session
+    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
 ) -> None:
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
--- a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py
+++ b/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py
@@ -26,23 +26,20 @@ from ee.onyx.external_permissions.sync_params import (
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_find_task
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
-from onyx.background.error_logging import emit_background_error
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
+from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.configs.constants import OnyxRedisSignals
-from onyx.connectors.exceptions import ConnectorValidationError
-from onyx.connectors.factory import validate_ccpair_for_user
 from onyx.db.connector import mark_cc_pair_as_external_group_synced
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
-from onyx.db.connector_credential_pair import update_connector_credential_pair
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import AccessType
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import SyncStatus
@@ -58,7 +55,6 @@ from onyx.redis.redis_connector_ext_group_sync import (
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.server.utils import make_short_id
-from onyx.utils.logger import format_error_for_logging
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -76,26 +72,18 @@ def _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
    """Returns boolean indicating if external group sync is due."""

    if cc_pair.access_type != AccessType.SYNC:
-        task_logger.error(
-            f"Recieved non-sync CC Pair {cc_pair.id} for external "
-            f"group sync. Actual access type: {cc_pair.access_type}"
-        )
+        return False
+
+    # skip external group sync if not active
+    if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE:
        return False

    if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
-        task_logger.debug(
-            f"Skipping group sync for CC Pair {cc_pair.id} - "
-            f"CC Pair is being deleted"
-        )
        return False

    # If there is not group sync function for the connector, we don't run the sync
    # This is fine because all sources dont necessarily have a concept of groups
    if not GROUP_PERMISSIONS_FUNC_MAP.get(cc_pair.connector.source):
-        task_logger.debug(
-            f"Skipping group sync for CC Pair {cc_pair.id} - "
-            f"no group sync function for {cc_pair.connector.source}"
-        )
        return False

    # If the last sync is None, it has never been run so we run the sync
@@ -123,11 +111,11 @@ def _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
 )
-def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:
+def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> bool | None:
    # we need to use celery's redis client to access its redis data
    # (which lives on a different db number)
-    r = get_redis_client()
-    r_replica = get_redis_replica_client()
+    r = get_redis_client(tenant_id=tenant_id)
+    r_replica = get_redis_replica_client(tenant_id=tenant_id)
    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
@@ -137,14 +125,11 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:

    # these tasks should never overlap
    if not lock_beat.acquire(blocking=False):
-        task_logger.warning(
-            f"Failed to acquire beat lock for external group sync: {tenant_id}"
-        )
        return None

    try:
        cc_pair_ids_to_sync: list[int] = []
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pairs = get_all_auto_sync_cc_pairs(db_session)

            # We only want to sync one cc_pair per source type in
@@ -152,10 +137,7 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:
            for source in GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC:
                # These are ordered by cc_pair id so the first one is the one we want
                cc_pairs_to_dedupe = get_cc_pairs_by_source(
-                    db_session,
-                    source,
-                    access_type=AccessType.SYNC,
-                    status=ConnectorCredentialPairStatus.ACTIVE,
+                    db_session, source, only_sync=True
                )
                # We only want to sync one cc_pair per source type
                # in GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC so we dedupe here
@@ -202,17 +184,12 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str) -> bool | None:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
-    except Exception as e:
-        error_msg = format_error_for_logging(e)
-        task_logger.warning(
-            f"Unexpected check_for_external_group_sync exception: tenant={tenant_id} {error_msg}"
-        )
+    except Exception:
        task_logger.exception(f"Unexpected exception: tenant={tenant_id}")
    finally:
        if lock_beat.owned():
            lock_beat.release()

-    task_logger.info(f"check_for_external_group_sync finished: tenant={tenant_id}")
    return True


@@ -220,7 +197,7 @@ def try_creating_external_group_sync_task(
    app: Celery,
    cc_pair_id: int,
    r: Redis,
-    tenant_id: str,
+    tenant_id: str | None,
 ) -> str | None:
    """Returns an int if syncing is needed. The int represents the number of sync tasks generated.
    Returns None if no syncing is required."""
@@ -228,12 +205,20 @@ def try_creating_external_group_sync_task(

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

+    LOCK_TIMEOUT = 30
+
+    lock: RedisLock = r.lock(
+        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + "try_generate_external_group_sync_tasks",
+        timeout=LOCK_TIMEOUT,
+    )
+
+    acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2)
+    if not acquired:
+        return None
+
    try:
        # Dont kick off a new sync if the previous one is still running
        if redis_connector.external_group_sync.fenced:
-            logger.warning(
-                f"Skipping external group sync for CC Pair {cc_pair_id} - already running."
-            )
            return None

        redis_connector.external_group_sync.generator_clear()
@@ -242,7 +227,7 @@ def try_creating_external_group_sync_task(
        # create before setting fence to avoid race condition where the monitoring
        # task updates the sync record before it is created
        try:
-            with get_session_with_current_tenant() as db_session:
+            with get_session_with_tenant(tenant_id) as db_session:
                insert_sync_record(
                    db_session=db_session,
                    entity_id=cc_pair_id,
@@ -279,19 +264,15 @@ def try_creating_external_group_sync_task(
        redis_connector.external_group_sync.set_fence(payload)

        payload_id = payload.id
-    except Exception as e:
-        error_msg = format_error_for_logging(e)
-        task_logger.warning(
-            f"Unexpected try_creating_external_group_sync_task exception: cc_pair={cc_pair_id} {error_msg}"
-        )
+    except Exception:
        task_logger.exception(
            f"Unexpected exception while trying to create external group sync task: cc_pair={cc_pair_id}"
        )
        return None
+    finally:
+        if lock.owned():
+            lock.release()

-    task_logger.info(
-        f"try_creating_external_group_sync_task finished: cc_pair={cc_pair_id} payload_id={payload_id}"
-    )
    return payload_id


@@ -306,7 +287,7 @@ def try_creating_external_group_sync_task(
 def connector_external_group_sync_generator_task(
    self: Task,
    cc_pair_id: int,
-    tenant_id: str,
+    tenant_id: str | None,
 ) -> None:
    """
    External group sync task for a given connector credential pair
@@ -315,7 +296,7 @@ def connector_external_group_sync_generator_task(

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    # this wait is needed to avoid a race condition where
    # the primary worker sends the task and it is immediately executed
@@ -323,26 +304,22 @@ def connector_external_group_sync_generator_task(
    start = time.monotonic()
    while True:
        if time.monotonic() - start > CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT:
-            msg = (
+            raise ValueError(
                f"connector_external_group_sync_generator_task - timed out waiting for fence to be ready: "
                f"fence={redis_connector.external_group_sync.fence_key}"
            )
-            emit_background_error(msg, cc_pair_id=cc_pair_id)
-            raise ValueError(msg)

        if not redis_connector.external_group_sync.fenced:  # The fence must exist
-            msg = (
+            raise ValueError(
                f"connector_external_group_sync_generator_task - fence not found: "
                f"fence={redis_connector.external_group_sync.fence_key}"
            )
-            emit_background_error(msg, cc_pair_id=cc_pair_id)
-            raise ValueError(msg)

        payload = redis_connector.external_group_sync.payload  # The payload must exist
        if not payload:
-            msg = "connector_external_group_sync_generator_task: payload invalid or not found"
-            emit_background_error(msg, cc_pair_id=cc_pair_id)
-            raise ValueError(msg)
+            raise ValueError(
+                "connector_external_group_sync_generator_task: payload invalid or not found"
+            )

        if payload.celery_task_id is None:
            logger.info(
@@ -367,77 +344,42 @@ def connector_external_group_sync_generator_task(

    acquired = lock.acquire(blocking=False)
    if not acquired:
-        msg = f"External group sync task already running, exiting...: cc_pair={cc_pair_id}"
-        emit_background_error(msg, cc_pair_id=cc_pair_id)
-        task_logger.error(msg)
+        task_logger.warning(
+            f"External group sync task already running, exiting...: cc_pair={cc_pair_id}"
+        )
        return None

    try:
        payload.started = datetime.now(timezone.utc)
        redis_connector.external_group_sync.set_fence(payload)

-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
-                eager_load_credential=True,
            )
            if cc_pair is None:
                raise ValueError(
                    f"No connector credential pair found for id: {cc_pair_id}"
                )

-            try:
-                created = validate_ccpair_for_user(
-                    cc_pair.connector.id,
-                    cc_pair.credential.id,
-                    db_session,
-                    enforce_creation=False,
-                )
-                if not created:
-                    task_logger.warning(
-                        f"Unable to create connector credential pair for id: {cc_pair_id}"
-                    )
-            except Exception:
-                task_logger.exception(
-                    f"validate_ccpair_permissions_sync exceptioned: cc_pair={cc_pair_id}"
-                )
-                update_connector_credential_pair(
-                    db_session=db_session,
-                    connector_id=cc_pair.connector.id,
-                    credential_id=cc_pair.credential.id,
-                    status=ConnectorCredentialPairStatus.INVALID,
-                )
-                raise
-
            source_type = cc_pair.connector.source

            ext_group_sync_func = GROUP_PERMISSIONS_FUNC_MAP.get(source_type)
            if ext_group_sync_func is None:
-                msg = f"No external group sync func found for {source_type} for cc_pair: {cc_pair_id}"
-                emit_background_error(msg, cc_pair_id=cc_pair_id)
-                raise ValueError(msg)
+                raise ValueError(
+                    f"No external group sync func found for {source_type} for cc_pair: {cc_pair_id}"
+                )

            logger.info(
                f"Syncing external groups for {source_type} for cc_pair: {cc_pair_id}"
            )
-            external_user_groups: list[ExternalUserGroup] = []
-            try:
-                external_user_groups = ext_group_sync_func(cc_pair)
-            except ConnectorValidationError as e:
-                msg = f"Error syncing external groups for {source_type} for cc_pair: {cc_pair_id} {e}"
-                update_connector_credential_pair(
-                    db_session=db_session,
-                    connector_id=cc_pair.connector.id,
-                    credential_id=cc_pair.credential.id,
-                    status=ConnectorCredentialPairStatus.INVALID,
-                )
-                raise e
+
+            external_user_groups: list[ExternalUserGroup] = ext_group_sync_func(cc_pair)

            logger.info(
                f"Syncing {len(external_user_groups)} external user groups for {source_type}"
            )
-            logger.debug(f"New external user groups: {external_user_groups}")

            replace_user__ext_group_for_cc_pair(
                db_session=db_session,
@@ -458,19 +400,11 @@ def connector_external_group_sync_generator_task(
                sync_status=SyncStatus.SUCCESS,
            )
    except Exception as e:
-        error_msg = format_error_for_logging(e)
-        task_logger.warning(
-            f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id} {error_msg}"
-        )
        task_logger.exception(
            f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}"
        )

-        msg = f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}"
-        task_logger.exception(msg)
-        emit_background_error(msg + f"\n\n{e}", cc_pair_id=cc_pair_id)
-
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            update_sync_record_status(
                db_session=db_session,
                entity_id=cc_pair_id,
@@ -493,7 +427,7 @@ def connector_external_group_sync_generator_task(


 def validate_external_group_sync_fences(
-    tenant_id: str,
+    tenant_id: str | None,
    celery_app: Celery,
    r: Redis,
    r_replica: Redis,
@@ -521,11 +455,12 @@ def validate_external_group_sync_fences(
        )

        lock_beat.reacquire()
+
    return


 def validate_external_group_sync_fence(
-    tenant_id: str,
+    tenant_id: str | None,
    key_bytes: bytes,
    reserved_tasks: set[str],
    r_celery: Redis,
@@ -557,11 +492,9 @@ def validate_external_group_sync_fence(
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
    if cc_pair_id_str is None:
-        msg = (
+        task_logger.warning(
            f"validate_external_group_sync_fence - could not parse id from {fence_key}"
        )
-        emit_background_error(msg)
-        task_logger.error(msg)
        return

    cc_pair_id = int(cc_pair_id_str)
@@ -576,14 +509,12 @@ def validate_external_group_sync_fence(
    try:
        payload = redis_connector.external_group_sync.payload
    except ValidationError:
-        msg = (
+        task_logger.exception(
            "validate_external_group_sync_fence - "
            "Resetting fence because fence schema is out of date: "
            f"cc_pair={cc_pair_id} "
            f"fence={fence_key}"
        )
-        task_logger.exception(msg)
-        emit_background_error(msg, cc_pair_id=cc_pair_id)

        redis_connector.external_group_sync.reset()
        return
@@ -620,15 +551,12 @@ def validate_external_group_sync_fence(
    # return

    # celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.
-    emit_background_error(
-        message=(
-            "validate_external_group_sync_fence - "
-            "Resetting fence because no associated celery tasks were found: "
-            f"cc_pair={cc_pair_id} "
-            f"fence={fence_key} "
-            f"payload_id={payload.id}"
-        ),
-        cc_pair_id=cc_pair_id,
+    logger.warning(
+        "validate_external_group_sync_fence - "
+        "Resetting fence because no associated celery tasks were found: "
+        f"cc_pair={cc_pair_id} "
+        f"fence={fence_key} "
+        f"payload_id={payload.id}"
    )

    redis_connector.external_group_sync.reset()
--- a/backend/onyx/background/celery/tasks/indexing/tasks.py
+++ b/backend/onyx/background/celery/tasks/indexing/tasks.py
--- a/backend/onyx/background/celery/tasks/indexing/utils.py
+++ b/backend/onyx/background/celery/tasks/indexing/utils.py
@@ -23,7 +23,7 @@ from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.db.engine import get_db_current_time
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import IndexingStatus
 from onyx.db.enums import IndexModelStatus
@@ -93,25 +93,27 @@ def get_unfenced_index_attempt_ids(db_session: Session, r: redis.Redis) -> list[
    return unfenced_attempts


-class IndexingCallbackBase(IndexingHeartbeatInterface):
+class IndexingCallback(IndexingHeartbeatInterface):
    PARENT_CHECK_INTERVAL = 60

    def __init__(
        self,
        parent_pid: int,
        redis_connector: RedisConnector,
+        redis_connector_index: RedisConnectorIndex,
        redis_lock: RedisLock,
        redis_client: Redis,
    ):
        super().__init__()
        self.parent_pid = parent_pid
        self.redis_connector: RedisConnector = redis_connector
+        self.redis_connector_index: RedisConnectorIndex = redis_connector_index
        self.redis_lock: RedisLock = redis_lock
        self.redis_client = redis_client
        self.started: datetime = datetime.now(timezone.utc)
        self.redis_lock.reacquire()

-        self.last_tag: str = f"{self.__class__.__name__}.__init__"
+        self.last_tag: str = "IndexingCallback.__init__"
        self.last_lock_reacquire: datetime = datetime.now(timezone.utc)
        self.last_lock_monotonic = time.monotonic()

@@ -125,8 +127,8 @@ class IndexingCallbackBase(IndexingHeartbeatInterface):

    def progress(self, tag: str, amount: int) -> None:
        # rkuo: this shouldn't be necessary yet because we spawn the process this runs inside
-        # with daemon=True. It seems likely some indexing tasks will need to spawn other processes
-        # eventually, which daemon=True prevents, so leave this code in until we're ready to test it.
+        # with daemon = True. It seems likely some indexing tasks will need to spawn other processes eventually
+        # so leave this code in until we're ready to test it.

        # if self.parent_pid:
        #     # check if the parent pid is alive so we aren't running as a zombie
@@ -141,6 +143,8 @@ class IndexingCallbackBase(IndexingHeartbeatInterface):
        #         self.last_parent_check = now

        try:
+            self.redis_connector.prune.set_active()
+
            current_time = time.monotonic()
            if current_time - self.last_lock_monotonic >= (
                CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4
@@ -152,7 +156,7 @@ class IndexingCallbackBase(IndexingHeartbeatInterface):
            self.last_tag = tag
        except LockError:
            logger.exception(
-                f"{self.__class__.__name__} - lock.reacquire exceptioned: "
+                f"IndexingCallback - lock.reacquire exceptioned: "
                f"lock_timeout={self.redis_lock.timeout} "
                f"start={self.started} "
                f"last_tag={self.last_tag} "
@@ -163,31 +167,13 @@ class IndexingCallbackBase(IndexingHeartbeatInterface):
            redis_lock_dump(self.redis_lock, self.redis_client)
            raise

-
-class IndexingCallback(IndexingCallbackBase):
-    def __init__(
-        self,
-        parent_pid: int,
-        redis_connector: RedisConnector,
-        redis_lock: RedisLock,
-        redis_client: Redis,
-        redis_connector_index: RedisConnectorIndex,
-    ):
-        super().__init__(parent_pid, redis_connector, redis_lock, redis_client)
-
-        self.redis_connector_index: RedisConnectorIndex = redis_connector_index
-
-    def progress(self, tag: str, amount: int) -> None:
-        self.redis_connector_index.set_active()
-        self.redis_connector_index.set_connector_active()
-        super().progress(tag, amount)
        self.redis_client.incrby(
            self.redis_connector_index.generator_progress_key, amount
        )


 def validate_indexing_fence(
-    tenant_id: str,
+    tenant_id: str | None,
    key_bytes: bytes,
    reserved_tasks: set[str],
    r_celery: Redis,
@@ -254,8 +240,7 @@ def validate_indexing_fence(
        # it would be odd to get here as there isn't that much that can go wrong during
        # initial fence setup, but it's still worth making sure we can recover
        logger.info(
-            f"validate_indexing_fence - "
-            f"Resetting fence in basic state without any activity: fence={fence_key}"
+            f"validate_indexing_fence - Resetting fence in basic state without any activity: fence={fence_key}"
        )
        redis_connector_index.reset()
        return
@@ -311,7 +296,7 @@ def validate_indexing_fence(


 def validate_indexing_fences(
-    tenant_id: str,
+    tenant_id: str | None,
    r_replica: Redis,
    r_celery: Redis,
    lock_beat: RedisLock,
@@ -332,7 +317,7 @@ def validate_indexing_fences(
        if not key_str.startswith(RedisConnectorIndex.FENCE_PREFIX):
            continue

-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            validate_indexing_fence(
                tenant_id,
                key_bytes,
@@ -442,7 +427,7 @@ def try_creating_indexing_task(
    reindex: bool,
    db_session: Session,
    r: Redis,
-    tenant_id: str,
+    tenant_id: str | None,
 ) -> int | None:
    """Checks for any conditions that should block the indexing task from being
    created, then creates the task.
--- a/backend/onyx/background/celery/tasks/llm_model_update/tasks.py
+++ b/backend/onyx/background/celery/tasks/llm_model_update/tasks.py
@@ -8,7 +8,7 @@ from onyx.background.celery.apps.app_base import task_logger
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.app_configs import LLM_MODEL_UPDATE_API_URL
 from onyx.configs.constants import OnyxCeleryTask
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.models import LLMProvider


@@ -59,7 +59,7 @@ def _process_model_list_response(model_list_json: Any) -> list[str]:
    trail=False,
    bind=True,
 )
-def check_for_llm_model_update(self: Task, *, tenant_id: str) -> bool | None:
+def check_for_llm_model_update(self: Task, *, tenant_id: str | None) -> bool | None:
    if not LLM_MODEL_UPDATE_API_URL:
        raise ValueError("LLM model update API URL not configured")

@@ -75,7 +75,7 @@ def check_for_llm_model_update(self: Task, *, tenant_id: str) -> bool | None:
        return None

    # Then update the database with the fetched models
-    with get_session_with_current_tenant() as db_session:
+    with get_session_with_tenant(tenant_id) as db_session:
        # Get the default LLM provider
        default_provider = (
            db_session.query(LLMProvider)
--- a/backend/onyx/background/celery/tasks/monitoring/tasks.py
+++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py
@@ -17,8 +17,7 @@ from sqlalchemy import text
 from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
-from onyx.background.celery.celery_redis import celery_get_queue_length
-from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
+from onyx.background.celery.tasks.vespa.tasks import celery_get_queue_length
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
 from onyx.configs.constants import OnyxCeleryQueues
@@ -26,8 +25,7 @@ from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.db.engine import get_all_tenant_ids
 from onyx.db.engine import get_db_current_time
-from onyx.db.engine import get_session_with_current_tenant
-from onyx.db.engine import get_session_with_shared_schema
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import IndexingStatus
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
@@ -43,6 +41,7 @@ from onyx.utils.telemetry import optional_telemetry
 from onyx.utils.telemetry import RecordType
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

+
 _MONITORING_SOFT_TIME_LIMIT = 60 * 5  # 5 minutes
 _MONITORING_TIME_LIMIT = _MONITORING_SOFT_TIME_LIMIT + 60  # 6 minutes

@@ -91,7 +90,7 @@ class Metric(BaseModel):
        }
        task_logger.info(json.dumps(data))

-    def emit(self, tenant_id: str) -> None:
+    def emit(self, tenant_id: str | None) -> None:
        # Convert value to appropriate type based on the input value
        bool_value = None
        float_value = None
@@ -190,9 +189,9 @@ def _build_connector_start_latency_metric(
        desired_start_time = cc_pair.connector.time_created
    else:
        if not cc_pair.connector.refresh_freq:
-            task_logger.debug(
-                "Connector has no refresh_freq and this is a non-initial index attempt. "
-                "Assuming user manually triggered indexing, so we'll skip start latency metric."
+            task_logger.error(
+                "Found non-initial index attempt for connector "
+                "without refresh_freq. This should never happen."
            )
            return None

@@ -421,7 +420,6 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
      - Throughput (docs/min) (only if success)
      - Raw start/end times for each sync
    """
-
    one_hour_ago = get_db_current_time(db_session) - timedelta(hours=1)

    # Get all sync records that ended in the last hour
@@ -589,10 +587,6 @@ def _collect_sync_metrics(db_session: Session, redis_std: Redis) -> list[Metric]
                entity = db_session.scalar(
                    select(UserGroup).where(UserGroup.id == sync_record.entity_id)
                )
-            else:
-                # Only user groups and document set sync records have
-                #  an associated entity we can use for latency metrics
-                continue

            if entity is None:
                task_logger.error(
@@ -656,7 +650,7 @@ def build_job_id(
    queue=OnyxCeleryQueues.MONITORING,
    bind=True,
 )
-def monitor_background_processes(self: Task, *, tenant_id: str) -> None:
+def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
    """Collect and emit metrics about background processes.
    This task runs periodically to gather metrics about:
    - Queue lengths for different Celery queues
@@ -668,7 +662,7 @@ def monitor_background_processes(self: Task, *, tenant_id: str) -> None:
        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

    task_logger.info("Starting background monitoring")
-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    lock_monitoring: RedisLock = r.lock(
        OnyxRedisLocks.MONITOR_BACKGROUND_PROCESSES_LOCK,
@@ -683,7 +677,7 @@ def monitor_background_processes(self: Task, *, tenant_id: str) -> None:
    try:
        # Get Redis client for Celery broker
        redis_celery = self.app.broker_connection().channel().client  # type: ignore
-        redis_std = get_redis_client()
+        redis_std = get_redis_client(tenant_id=tenant_id)

        # Define metric collection functions and their dependencies
        metric_functions: list[Callable[[], list[Metric]]] = [
@@ -693,7 +687,7 @@ def monitor_background_processes(self: Task, *, tenant_id: str) -> None:
        ]

        # Collect and log each metric
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            for metric_fn in metric_functions:
                metrics = metric_fn()
                for metric in metrics:
@@ -723,7 +717,7 @@ def monitor_background_processes(self: Task, *, tenant_id: str) -> None:


@shared_task(
-    name=OnyxCeleryTask.CLOUD_MONITOR_ALEMBIC,
+    name=OnyxCeleryTask.CLOUD_CHECK_ALEMBIC,
 )
 def cloud_check_alembic() -> bool | None:
    """A task to verify that all tenants are on the same alembic revision.
@@ -771,18 +765,19 @@ def cloud_check_alembic() -> bool | None:
            if tenant_id is None:
                continue

-            with get_session_with_shared_schema() as session:
+            with get_session_with_tenant(tenant_id=None) as session:
                try:
                    result = session.execute(
                        text(f'SELECT * FROM "{tenant_id}".alembic_version LIMIT 1')
                    )
+
                    result_scalar: str | None = result.scalar_one_or_none()
                    if result_scalar is None:
                        raise ValueError("Alembic version should not be None.")

                    tenant_to_revision[tenant_id] = result_scalar
                except Exception:
-                    task_logger.error(f"Tenant {tenant_id} has no revision!")
+                    task_logger.warning(f"Tenant {tenant_id} has no revision!")
                    tenant_to_revision[tenant_id] = ALEMBIC_NULL_REVISION

        # get the total count of each revision
@@ -852,55 +847,3 @@ def cloud_check_alembic() -> bool | None:
        f"cloud_check_alembic finished: num_tenants={len(tenant_ids)} elapsed={time_elapsed:.2f}"
    )
    return True
-
-
-@shared_task(
-    name=OnyxCeleryTask.CLOUD_MONITOR_CELERY_QUEUES, ignore_result=True, bind=True
-)
-def cloud_monitor_celery_queues(
-    self: Task,
-) -> None:
-    return monitor_celery_queues_helper(self)
-
-
-@shared_task(name=OnyxCeleryTask.MONITOR_CELERY_QUEUES, ignore_result=True, bind=True)
-def monitor_celery_queues(self: Task, *, tenant_id: str) -> None:
-    return monitor_celery_queues_helper(self)
-
-
-def monitor_celery_queues_helper(
-    task: Task,
-) -> None:
-    """A task to monitor all celery queue lengths."""
-
-    r_celery = task.app.broker_connection().channel().client  # type: ignore
-    n_celery = celery_get_queue_length("celery", r_celery)
-    n_indexing = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_INDEXING, r_celery)
-    n_sync = celery_get_queue_length(OnyxCeleryQueues.VESPA_METADATA_SYNC, r_celery)
-    n_deletion = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_DELETION, r_celery)
-    n_pruning = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_PRUNING, r_celery)
-    n_permissions_sync = celery_get_queue_length(
-        OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC, r_celery
-    )
-    n_external_group_sync = celery_get_queue_length(
-        OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC, r_celery
-    )
-    n_permissions_upsert = celery_get_queue_length(
-        OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT, r_celery
-    )
-
-    n_indexing_prefetched = celery_get_unacked_task_ids(
-        OnyxCeleryQueues.CONNECTOR_INDEXING, r_celery
-    )
-
-    task_logger.info(
-        f"Queue lengths: celery={n_celery} "
-        f"indexing={n_indexing} "
-        f"indexing_prefetched={len(n_indexing_prefetched)} "
-        f"sync={n_sync} "
-        f"deletion={n_deletion} "
-        f"pruning={n_pruning} "
-        f"permissions_sync={n_permissions_sync} "
-        f"external_group_sync={n_external_group_sync} "
-        f"permissions_upsert={n_permissions_upsert} "
-    )
--- a/backend/onyx/background/celery/tasks/periodic/tasks.py
+++ b/backend/onyx/background/celery/tasks/periodic/tasks.py
@@ -15,7 +15,7 @@ from onyx.background.celery.apps.app_base import task_logger
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import PostgresAdvisoryLocks
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant


@shared_task(
@@ -24,7 +24,7 @@ from onyx.db.engine import get_session_with_current_tenant
    bind=True,
    base=AbortableTask,
 )
-def kombu_message_cleanup_task(self: Any, tenant_id: str) -> int:
+def kombu_message_cleanup_task(self: Any, tenant_id: str | None) -> int:
    """Runs periodically to clean up the kombu_message table"""

    # we will select messages older than this amount to clean up
@@ -36,7 +36,7 @@ def kombu_message_cleanup_task(self: Any, tenant_id: str) -> int:
    ctx["deleted"] = 0
    ctx["cleanup_age"] = KOMBU_MESSAGE_CLEANUP_AGE
    ctx["page_limit"] = KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT
-    with get_session_with_current_tenant() as db_session:
+    with get_session_with_tenant(tenant_id) as db_session:
        # Exit the task if we can't take the advisory lock
        result = db_session.execute(
            text("SELECT pg_try_advisory_lock(:id)"),
--- a/backend/onyx/background/celery/tasks/pruning/tasks.py
+++ b/backend/onyx/background/celery/tasks/pruning/tasks.py
@@ -21,7 +21,7 @@ from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_queued_task_ids
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.background.celery.celery_utils import extract_ids_from_runnable_connector
-from onyx.background.celery.tasks.indexing.utils import IndexingCallbackBase
+from onyx.background.celery.tasks.indexing.utils import IndexingCallback
 from onyx.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
@@ -41,7 +41,7 @@ from onyx.db.connector_credential_pair import get_connector_credential_pair
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
 from onyx.db.document import get_documents_for_connector_credential_pair
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
@@ -55,7 +55,6 @@ from onyx.redis.redis_connector_prune import RedisConnectorPrunePayload
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.server.utils import make_short_id
-from onyx.utils.logger import format_error_for_logging
 from onyx.utils.logger import LoggerContextVars
 from onyx.utils.logger import pruning_ctx
 from onyx.utils.logger import setup_logger
@@ -63,12 +62,6 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


-class PruneCallback(IndexingCallbackBase):
-    def progress(self, tag: str, amount: int) -> None:
-        self.redis_connector.prune.set_active()
-        super().progress(tag, amount)
-
-
 """Jobs / utils for kicking off pruning tasks."""


@@ -114,9 +107,9 @@ def _is_pruning_due(cc_pair: ConnectorCredentialPair) -> bool:
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
 )
-def check_for_pruning(self: Task, *, tenant_id: str) -> bool | None:
-    r = get_redis_client()
-    r_replica = get_redis_replica_client()
+def check_for_pruning(self: Task, *, tenant_id: str | None) -> bool | None:
+    r = get_redis_client(tenant_id=tenant_id)
+    r_replica = get_redis_replica_client(tenant_id=tenant_id)
    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
@@ -129,39 +122,34 @@ def check_for_pruning(self: Task, *, tenant_id: str) -> bool | None:
        return None

    try:
-        # the entire task needs to run frequently in order to finalize pruning
+        cc_pair_ids: list[int] = []
+        with get_session_with_tenant(tenant_id) as db_session:
+            cc_pairs = get_connector_credential_pairs(db_session)
+            for cc_pair_entry in cc_pairs:
+                cc_pair_ids.append(cc_pair_entry.id)

-        # but pruning only kicks off once per hour
-        if not r.exists(OnyxRedisSignals.BLOCK_PRUNING):
-            cc_pair_ids: list[int] = []
-            with get_session_with_current_tenant() as db_session:
-                cc_pairs = get_connector_credential_pairs(db_session)
-                for cc_pair_entry in cc_pairs:
-                    cc_pair_ids.append(cc_pair_entry.id)
+        for cc_pair_id in cc_pair_ids:
+            lock_beat.reacquire()
+            with get_session_with_tenant(tenant_id) as db_session:
+                cc_pair = get_connector_credential_pair_from_id(
+                    db_session=db_session,
+                    cc_pair_id=cc_pair_id,
+                )
+                if not cc_pair:
+                    continue

-            for cc_pair_id in cc_pair_ids:
-                lock_beat.reacquire()
-                with get_session_with_current_tenant() as db_session:
-                    cc_pair = get_connector_credential_pair_from_id(
-                        db_session=db_session,
-                        cc_pair_id=cc_pair_id,
-                    )
-                    if not cc_pair:
-                        continue
+                if not _is_pruning_due(cc_pair):
+                    continue

-                    if not _is_pruning_due(cc_pair):
-                        continue
+                payload_id = try_creating_prune_generator_task(
+                    self.app, cc_pair, db_session, r, tenant_id
+                )
+                if not payload_id:
+                    continue

-                    payload_id = try_creating_prune_generator_task(
-                        self.app, cc_pair, db_session, r, tenant_id
-                    )
-                    if not payload_id:
-                        continue
-
-                    task_logger.info(
-                        f"Pruning queued: cc_pair={cc_pair.id} id={payload_id}"
-                    )
-            r.set(OnyxRedisSignals.BLOCK_PRUNING, 1, ex=3600)
+                task_logger.info(
+                    f"Pruning queued: cc_pair={cc_pair.id} id={payload_id}"
+                )

        # we want to run this less frequently than the overall task
        lock_beat.reacquire()
@@ -175,34 +163,16 @@ def check_for_pruning(self: Task, *, tenant_id: str) -> bool | None:
                task_logger.exception("Exception while validating pruning fences")

            r.set(OnyxRedisSignals.BLOCK_VALIDATE_PRUNING_FENCES, 1, ex=300)
-
-        # use a lookup table to find active fences. We still have to verify the fence
-        # exists since it is an optimization and not the source of truth.
-        lock_beat.reacquire()
-        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
-        for key in keys:
-            key_bytes = cast(bytes, key)
-
-            if not r.exists(key_bytes):
-                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
-                continue
-
-            key_str = key_bytes.decode("utf-8")
-            if key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):
-                with get_session_with_current_tenant() as db_session:
-                    monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session)
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
        )
-    except Exception as e:
-        error_msg = format_error_for_logging(e)
-        task_logger.warning(f"Unexpected pruning check exception: {error_msg}")
+    except Exception:
        task_logger.exception("Unexpected exception during pruning check")
    finally:
        if lock_beat.owned():
            lock_beat.release()
-    task_logger.info(f"check_for_pruning finished: tenant={tenant_id}")
+
    return True


@@ -211,7 +181,7 @@ def try_creating_prune_generator_task(
    cc_pair: ConnectorCredentialPair,
    db_session: Session,
    r: Redis,
-    tenant_id: str,
+    tenant_id: str | None,
 ) -> str | None:
    """Checks for any conditions that should block the pruning generator task from being
    created, then creates the task.
@@ -304,19 +274,13 @@ def try_creating_prune_generator_task(
        redis_connector.prune.set_fence(payload)

        payload_id = payload.id
-    except Exception as e:
-        error_msg = format_error_for_logging(e)
-        task_logger.warning(
-            f"Unexpected try_creating_prune_generator_task exception: cc_pair={cc_pair.id} {error_msg}"
-        )
+    except Exception:
        task_logger.exception(f"Unexpected exception: cc_pair={cc_pair.id}")
        return None
    finally:
        if lock.owned():
            lock.release()
-    task_logger.info(
-        f"try_creating_prune_generator_task finished: cc_pair={cc_pair.id} payload_id={payload_id}"
-    )
+
    return payload_id


@@ -333,7 +297,7 @@ def connector_pruning_generator_task(
    cc_pair_id: int,
    connector_id: int,
    credential_id: int,
-    tenant_id: str,
+    tenant_id: str | None,
 ) -> None:
    """connector pruning task. For a cc pair, this task pulls all document IDs from the source
    and compares those IDs to locally stored documents and deletes all locally stored IDs missing
@@ -352,7 +316,7 @@ def connector_pruning_generator_task(

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    # this wait is needed to avoid a race condition where
    # the primary worker sends the task and it is immediately executed
@@ -410,7 +374,7 @@ def connector_pruning_generator_task(
        return None

    try:
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pair = get_connector_credential_pair(
                db_session=db_session,
                connector_id=connector_id,
@@ -440,7 +404,6 @@ def connector_pruning_generator_task(
                f"cc_pair={cc_pair_id} "
                f"connector_source={cc_pair.connector.source}"
            )
-
            runnable_connector = instantiate_connector(
                db_session,
                cc_pair.connector.source,
@@ -450,11 +413,12 @@ def connector_pruning_generator_task(
            )

            search_settings = get_current_search_settings(db_session)
-            redis_connector.new_index(search_settings.id)
+            redis_connector_index = redis_connector.new_index(search_settings.id)

-            callback = PruneCallback(
+            callback = IndexingCallback(
                0,
                redis_connector,
+                redis_connector_index,
                lock,
                r,
            )
@@ -517,11 +481,11 @@ def connector_pruning_generator_task(
    )


-"""Monitoring pruning utils"""
+"""Monitoring pruning utils, called in monitor_vespa_sync"""


 def monitor_ccpair_pruning_taskset(
-    tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session
+    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
 ) -> None:
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
@@ -567,7 +531,7 @@ def monitor_ccpair_pruning_taskset(


 def validate_pruning_fences(
-    tenant_id: str,
+    tenant_id: str | None,
    r: Redis,
    r_replica: Redis,
    r_celery: Redis,
@@ -615,7 +579,7 @@ def validate_pruning_fences(


 def validate_pruning_fence(
-    tenant_id: str,
+    tenant_id: str | None,
    key_bytes: bytes,
    reserved_tasks: set[str],
    queued_tasks: set[str],
--- a/backend/onyx/background/celery/tasks/shared/RetryDocumentIndex.py
+++ b/backend/onyx/background/celery/tasks/shared/RetryDocumentIndex.py
@@ -32,7 +32,7 @@ class RetryDocumentIndex:
        self,
        doc_id: str,
        *,
-        tenant_id: str,
+        tenant_id: str | None,
        chunk_count: int | None,
    ) -> int:
        return self.index.delete_single(
@@ -50,7 +50,7 @@ class RetryDocumentIndex:
        self,
        doc_id: str,
        *,
-        tenant_id: str,
+        tenant_id: str | None,
        chunk_count: int | None,
        fields: VespaDocumentFields,
    ) -> int:
--- a/backend/onyx/background/celery/tasks/shared/tasks.py
+++ b/backend/onyx/background/celery/tasks/shared/tasks.py
@@ -1,5 +1,4 @@
 import time
-from enum import Enum
 from http import HTTPStatus

 import httpx
@@ -28,7 +27,7 @@ from onyx.db.document import mark_document_as_modified
 from onyx.db.document import mark_document_as_synced
 from onyx.db.document_set import fetch_document_sets_for_document
 from onyx.db.engine import get_all_tenant_ids
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.search_settings import get_active_search_settings
 from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import VespaDocumentFields
@@ -46,24 +45,6 @@ LIGHT_SOFT_TIME_LIMIT = 105
 LIGHT_TIME_LIMIT = LIGHT_SOFT_TIME_LIMIT + 15


-class OnyxCeleryTaskCompletionStatus(str, Enum):
-    """The different statuses the watchdog can finish with.
-
-    TODO: create broader success/failure/abort categories
-    """
-
-    UNDEFINED = "undefined"
-
-    SUCCEEDED = "succeeded"
-
-    SKIPPED = "skipped"
-
-    SOFT_TIME_LIMIT = "soft_time_limit"
-
-    NON_RETRYABLE_EXCEPTION = "non_retryable_exception"
-    RETRYABLE_EXCEPTION = "retryable_exception"
-
-
@shared_task(
    name=OnyxCeleryTask.DOCUMENT_BY_CC_PAIR_CLEANUP_TASK,
    soft_time_limit=LIGHT_SOFT_TIME_LIMIT,
@@ -76,7 +57,7 @@ def document_by_cc_pair_cleanup_task(
    document_id: str,
    connector_id: int,
    credential_id: int,
-    tenant_id: str,
+    tenant_id: str | None,
 ) -> bool:
    """A lightweight subtask used to clean up document to cc pair relationships.
    Created by connection deletion and connector pruning parent tasks."""
@@ -97,10 +78,8 @@ def document_by_cc_pair_cleanup_task(

    start = time.monotonic()

-    completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED
-
    try:
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            action = "skip"
            chunks_affected = 0

@@ -126,14 +105,10 @@ def document_by_cc_pair_cleanup_task(
                    tenant_id=tenant_id,
                    chunk_count=chunk_count,
                )
-
                delete_documents_complete__no_commit(
                    db_session=db_session,
                    document_ids=[document_id],
                )
-                db_session.commit()
-
-                completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
            elif count > 1:
                action = "update"

@@ -177,11 +152,10 @@ def document_by_cc_pair_cleanup_task(
                )

                mark_document_as_synced(document_id, db_session)
-                db_session.commit()
-
-                completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
            else:
-                completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED
+                pass
+
+            db_session.commit()

            elapsed = time.monotonic() - start
            task_logger.info(
@@ -193,79 +167,57 @@ def document_by_cc_pair_cleanup_task(
            )
    except SoftTimeLimitExceeded:
        task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
-        completion_status = OnyxCeleryTaskCompletionStatus.SOFT_TIME_LIMIT
+        return False
    except Exception as ex:
        e: Exception | None = None
-        while True:
-            if isinstance(ex, RetryError):
-                task_logger.warning(
-                    f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
-                )
-
-                # only set the inner exception if it is of type Exception
-                e_temp = ex.last_attempt.exception()
-                if isinstance(e_temp, Exception):
-                    e = e_temp
-            else:
-                e = ex
-
-            if isinstance(e, httpx.HTTPStatusError):
-                if e.response.status_code == HTTPStatus.BAD_REQUEST:
-                    task_logger.exception(
-                        f"Non-retryable HTTPStatusError: "
-                        f"doc={document_id} "
-                        f"status={e.response.status_code}"
-                    )
-                completion_status = (
-                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
-                )
-                break
-
-            task_logger.exception(
-                f"document_by_cc_pair_cleanup_task exceptioned: doc={document_id}"
+        if isinstance(ex, RetryError):
+            task_logger.warning(
+                f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
            )

-            completion_status = OnyxCeleryTaskCompletionStatus.RETRYABLE_EXCEPTION
-            if (
-                self.max_retries is not None
-                and self.request.retries >= self.max_retries
-            ):
-                # This is the last attempt! mark the document as dirty in the db so that it
-                # eventually gets fixed out of band via stale document reconciliation
-                task_logger.warning(
-                    f"Max celery task retries reached. Marking doc as dirty for reconciliation: "
-                    f"doc={document_id}"
-                )
-                with get_session_with_current_tenant() as db_session:
-                    # delete the cc pair relationship now and let reconciliation clean it up
-                    # in vespa
-                    delete_document_by_connector_credential_pair__no_commit(
-                        db_session=db_session,
-                        document_id=document_id,
-                        connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(
-                            connector_id=connector_id,
-                            credential_id=credential_id,
-                        ),
-                    )
-                    mark_document_as_modified(document_id, db_session)
-                completion_status = (
-                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
-                )
-                break
+            # only set the inner exception if it is of type Exception
+            e_temp = ex.last_attempt.exception()
+            if isinstance(e_temp, Exception):
+                e = e_temp
+        else:
+            e = ex

-            # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
+        if isinstance(e, httpx.HTTPStatusError):
+            if e.response.status_code == HTTPStatus.BAD_REQUEST:
+                task_logger.exception(
+                    f"Non-retryable HTTPStatusError: "
+                    f"doc={document_id} "
+                    f"status={e.response.status_code}"
+                )
+            return False
+
+        task_logger.exception(f"Unexpected exception: doc={document_id}")
+
+        if self.request.retries < DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES:
+            # Still retrying. Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
            countdown = 2 ** (self.request.retries + 4)
-            self.retry(exc=e, countdown=countdown)  # this will raise a celery exception
-            break  # we won't hit this, but it looks weird not to have it
-    finally:
-        task_logger.info(
-            f"document_by_cc_pair_cleanup_task completed: status={completion_status.value} doc={document_id}"
-        )
-
-    if completion_status != OnyxCeleryTaskCompletionStatus.SUCCEEDED:
+            self.retry(exc=e, countdown=countdown)
+        else:
+            # This is the last attempt! mark the document as dirty in the db so that it
+            # eventually gets fixed out of band via stale document reconciliation
+            task_logger.warning(
+                f"Max celery task retries reached. Marking doc as dirty for reconciliation: "
+                f"doc={document_id}"
+            )
+            with get_session_with_tenant(tenant_id) as db_session:
+                # delete the cc pair relationship now and let reconciliation clean it up
+                # in vespa
+                delete_document_by_connector_credential_pair__no_commit(
+                    db_session=db_session,
+                    document_id=document_id,
+                    connector_credential_pair_identifier=ConnectorCredentialPairIdentifier(
+                        connector_id=connector_id,
+                        credential_id=credential_id,
+                    ),
+                )
+                mark_document_as_modified(document_id, db_session)
        return False

-    task_logger.info(f"document_by_cc_pair_cleanup_task finished: doc={document_id}")
    return True


@@ -297,8 +249,7 @@ def cloud_beat_task_generator(
        return None

    last_lock_time = time.monotonic()
-    tenant_ids: list[str] = []
-    num_processed_tenants = 0
+    tenant_ids: list[str] | list[None] = []

    try:
        tenant_ids = get_all_tenant_ids()
@@ -324,10 +275,7 @@ def cloud_beat_task_generator(
                queue=queue,
                priority=priority,
                expires=expires,
-                ignore_result=True,
            )
-
-            num_processed_tenants += 1
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
@@ -347,7 +295,6 @@ def cloud_beat_task_generator(
    task_logger.info(
        f"cloud_beat_task_generator finished: "
        f"task={task_name} "
-        f"num_processed_tenants={num_processed_tenants} "
        f"num_tenants={len(tenant_ids)} "
        f"elapsed={time_elapsed:.2f}"
    )
--- a/backend/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/onyx/background/celery/tasks/vespa/tasks.py
@@ -1,5 +1,9 @@
+import random
 import time
+import traceback
 from collections.abc import Callable
+from datetime import datetime
+from datetime import timezone
 from http import HTTPStatus
 from typing import Any
 from typing import cast
@@ -9,6 +13,8 @@ from celery import Celery
 from celery import shared_task
 from celery import Task
 from celery.exceptions import SoftTimeLimitExceeded
+from celery.result import AsyncResult
+from celery.states import READY_STATES
 from redis import Redis
 from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session
@@ -16,28 +22,47 @@ from tenacity import RetryError

 from onyx.access.access import get_access_for_document
 from onyx.background.celery.apps.app_base import task_logger
+from onyx.background.celery.celery_redis import celery_get_queue_length
+from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
+from onyx.background.celery.tasks.doc_permission_syncing.tasks import (
+    monitor_ccpair_permissions_taskset,
+)
+from onyx.background.celery.tasks.pruning.tasks import monitor_ccpair_pruning_taskset
 from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
 from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
 from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
-from onyx.background.celery.tasks.shared.tasks import OnyxCeleryTaskCompletionStatus
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.app_configs import VESPA_SYNC_MAX_TASKS
 from onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
+from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
+from onyx.configs.constants import OnyxRedisSignals
+from onyx.db.connector import fetch_connector_by_id
+from onyx.db.connector_credential_pair import add_deletion_failure_message
+from onyx.db.connector_credential_pair import (
+    delete_connector_credential_pair__no_commit,
+)
+from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
 from onyx.db.document import count_documents_by_needs_sync
 from onyx.db.document import get_document
+from onyx.db.document import get_document_ids_for_connector_credential_pair
 from onyx.db.document import mark_document_as_synced
 from onyx.db.document_set import delete_document_set
+from onyx.db.document_set import delete_document_set_cc_pair_relationship__no_commit
 from onyx.db.document_set import fetch_document_sets
 from onyx.db.document_set import fetch_document_sets_for_document
 from onyx.db.document_set import get_document_set_by_id
 from onyx.db.document_set import mark_document_set_as_synced
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
+from onyx.db.enums import IndexingStatus
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
+from onyx.db.index_attempt import delete_index_attempts
+from onyx.db.index_attempt import get_index_attempt
+from onyx.db.index_attempt import mark_attempt_failed
 from onyx.db.models import DocumentSet
 from onyx.db.models import UserGroup
 from onyx.db.search_settings import get_active_search_settings
@@ -47,14 +72,20 @@ from onyx.db.sync_record import update_sync_record_status
 from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import VespaDocumentFields
 from onyx.httpx.httpx_pool import HttpxPool
+from onyx.redis.redis_connector import RedisConnector
 from onyx.redis.redis_connector_credential_pair import RedisConnectorCredentialPair
 from onyx.redis.redis_connector_credential_pair import (
    RedisGlobalConnectorCredentialPair,
 )
+from onyx.redis.redis_connector_delete import RedisConnectorDelete
+from onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
+from onyx.redis.redis_connector_index import RedisConnectorIndex
+from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_document_set import RedisDocumentSet
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.redis.redis_pool import redis_lock_dump
+from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
 from onyx.redis.redis_usergroup import RedisUserGroup
 from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import fetch_versioned_implementation
@@ -63,6 +94,7 @@ from onyx.utils.variable_functionality import (
 )
 from onyx.utils.variable_functionality import global_version
 from onyx.utils.variable_functionality import noop_fallback
+from shared_configs.configs import MULTI_TENANT

 logger = setup_logger()

@@ -76,17 +108,12 @@ logger = setup_logger()
    trail=False,
    bind=True,
 )
-def check_for_vespa_sync_task(self: Task, *, tenant_id: str) -> bool | None:
+def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | None:
    """Runs periodically to check if any document needs syncing.
    Generates sets of tasks for Celery if syncing is needed."""
-
-    # Useful for debugging timing issues with reacquisitions. TODO: remove once more generalized logging is in place
-    task_logger.info("check_for_vespa_sync_task started")
-
    time_start = time.monotonic()

-    r = get_redis_client()
-    r_replica = get_redis_replica_client()
+    r = get_redis_client(tenant_id=tenant_id)

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK,
@@ -98,8 +125,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str) -> bool | None:
        return None

    try:
-        # 1/3: KICKOFF
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            try_generate_stale_document_sync_tasks(
                self.app, VESPA_SYNC_MAX_TASKS, db_session, r, lock_beat, tenant_id
            )
@@ -107,7 +133,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str) -> bool | None:
        # region document set scan
        lock_beat.reacquire()
        document_set_ids: list[int] = []
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            # check if any document sets are not synced
            document_set_info = fetch_document_sets(
                user_id=None, db_session=db_session, include_outdated=True
@@ -118,15 +144,16 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str) -> bool | None:

        for document_set_id in document_set_ids:
            lock_beat.reacquire()
-            with get_session_with_current_tenant() as db_session:
+            with get_session_with_tenant(tenant_id) as db_session:
                try_generate_document_set_sync_tasks(
                    self.app, document_set_id, db_session, r, lock_beat, tenant_id
                )
        # endregion

        # check if any user groups are not synced
-        lock_beat.reacquire()
        if global_version.is_ee_version():
+            lock_beat.reacquire()
+
            try:
                fetch_user_groups = fetch_versioned_implementation(
                    "onyx.db.user_group", "fetch_user_groups"
@@ -137,7 +164,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str) -> bool | None:
                pass
            else:
                usergroup_ids: list[int] = []
-                with get_session_with_current_tenant() as db_session:
+                with get_session_with_tenant(tenant_id) as db_session:
                    user_groups = fetch_user_groups(
                        db_session=db_session, only_up_to_date=False
                    )
@@ -147,40 +174,11 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str) -> bool | None:

                for usergroup_id in usergroup_ids:
                    lock_beat.reacquire()
-                    with get_session_with_current_tenant() as db_session:
+                    with get_session_with_tenant(tenant_id) as db_session:
                        try_generate_user_group_sync_tasks(
                            self.app, usergroup_id, db_session, r, lock_beat, tenant_id
                        )

-        # 2/3: VALIDATE: TODO
-
-        # 3/3: FINALIZE
-        lock_beat.reacquire()
-        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
-        for key in keys:
-            key_bytes = cast(bytes, key)
-
-            if not r.exists(key_bytes):
-                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
-                continue
-
-            key_str = key_bytes.decode("utf-8")
-            if key_str == RedisGlobalConnectorCredentialPair.FENCE_KEY:
-                monitor_connector_taskset(r)
-            elif key_str.startswith(RedisDocumentSet.FENCE_PREFIX):
-                with get_session_with_current_tenant() as db_session:
-                    monitor_document_set_taskset(tenant_id, key_bytes, r, db_session)
-            elif key_str.startswith(RedisUserGroup.FENCE_PREFIX):
-                monitor_usergroup_taskset = (
-                    fetch_versioned_implementation_with_fallback(
-                        "onyx.background.celery.tasks.vespa.tasks",
-                        "monitor_usergroup_taskset",
-                        noop_fallback,
-                    )
-                )
-                with get_session_with_current_tenant() as db_session:
-                    monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session)
-
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
@@ -208,7 +206,7 @@ def try_generate_stale_document_sync_tasks(
    db_session: Session,
    r: Redis,
    lock_beat: RedisLock,
-    tenant_id: str,
+    tenant_id: str | None,
 ) -> int | None:
    # the fence is up, do nothing

@@ -284,7 +282,7 @@ def try_generate_document_set_sync_tasks(
    db_session: Session,
    r: Redis,
    lock_beat: RedisLock,
-    tenant_id: str,
+    tenant_id: str | None,
 ) -> int | None:
    lock_beat.reacquire()

@@ -361,7 +359,7 @@ def try_generate_user_group_sync_tasks(
    db_session: Session,
    r: Redis,
    lock_beat: RedisLock,
-    tenant_id: str,
+    tenant_id: str | None,
 ) -> int | None:
    lock_beat.reacquire()

@@ -448,7 +446,7 @@ def monitor_connector_taskset(r: Redis) -> None:


 def monitor_document_set_taskset(
-    tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session
+    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
 ) -> None:
    fence_key = key_bytes.decode("utf-8")
    document_set_id_str = RedisDocumentSet.get_id_from_fence_key(fence_key)
@@ -497,25 +495,486 @@ def monitor_document_set_taskset(
            task_logger.info(
                f"Successfully synced document set: document_set={document_set_id}"
            )
-
-        try:
-            update_sync_record_status(
-                db_session=db_session,
-                entity_id=document_set_id,
-                sync_type=SyncType.DOCUMENT_SET,
-                sync_status=SyncStatus.SUCCESS,
-                num_docs_synced=initial_count,
-            )
-        except Exception:
-            task_logger.exception(
-                "update_sync_record_status exceptioned. "
-                f"document_set_id={document_set_id} "
-                "Resetting document set regardless."
-            )
+        update_sync_record_status(
+            db_session=db_session,
+            entity_id=document_set_id,
+            sync_type=SyncType.DOCUMENT_SET,
+            sync_status=SyncStatus.SUCCESS,
+            num_docs_synced=initial_count,
+        )

    rds.reset()


+def monitor_connector_deletion_taskset(
+    tenant_id: str | None, key_bytes: bytes, r: Redis
+) -> None:
+    fence_key = key_bytes.decode("utf-8")
+    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
+    if cc_pair_id_str is None:
+        task_logger.warning(f"could not parse cc_pair_id from {fence_key}")
+        return
+
+    cc_pair_id = int(cc_pair_id_str)
+
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+
+    fence_data = redis_connector.delete.payload
+    if not fence_data:
+        task_logger.warning(
+            f"Connector deletion - fence payload invalid: cc_pair={cc_pair_id}"
+        )
+        return
+
+    if fence_data.num_tasks is None:
+        # the fence is setting up but isn't ready yet
+        return
+
+    remaining = redis_connector.delete.get_remaining()
+    task_logger.info(
+        f"Connector deletion progress: cc_pair={cc_pair_id} remaining={remaining} initial={fence_data.num_tasks}"
+    )
+    if remaining > 0:
+        with get_session_with_tenant(tenant_id) as db_session:
+            update_sync_record_status(
+                db_session=db_session,
+                entity_id=cc_pair_id,
+                sync_type=SyncType.CONNECTOR_DELETION,
+                sync_status=SyncStatus.IN_PROGRESS,
+                num_docs_synced=remaining,
+            )
+        return
+
+    with get_session_with_tenant(tenant_id) as db_session:
+        cc_pair = get_connector_credential_pair_from_id(
+            db_session=db_session,
+            cc_pair_id=cc_pair_id,
+        )
+        if not cc_pair:
+            task_logger.warning(
+                f"Connector deletion - cc_pair not found: cc_pair={cc_pair_id}"
+            )
+            return
+
+        try:
+            doc_ids = get_document_ids_for_connector_credential_pair(
+                db_session, cc_pair.connector_id, cc_pair.credential_id
+            )
+            if len(doc_ids) > 0:
+                # NOTE(rkuo): if this happens, documents somehow got added while
+                # deletion was in progress. Likely a bug gating off pruning and indexing
+                # work before deletion starts.
+                task_logger.warning(
+                    "Connector deletion - documents still found after taskset completion. "
+                    "Clearing the current deletion attempt and allowing deletion to restart: "
+                    f"cc_pair={cc_pair_id} "
+                    f"docs_deleted={fence_data.num_tasks} "
+                    f"docs_remaining={len(doc_ids)}"
+                )
+
+                # We don't want to waive off why we get into this state, but resetting
+                # our attempt and letting the deletion restart is a good way to recover
+                redis_connector.delete.reset()
+                raise RuntimeError(
+                    "Connector deletion - documents still found after taskset completion"
+                )
+
+            # clean up the rest of the related Postgres entities
+            # index attempts
+            delete_index_attempts(
+                db_session=db_session,
+                cc_pair_id=cc_pair_id,
+            )
+
+            # document sets
+            delete_document_set_cc_pair_relationship__no_commit(
+                db_session=db_session,
+                connector_id=cc_pair.connector_id,
+                credential_id=cc_pair.credential_id,
+            )
+
+            # user groups
+            cleanup_user_groups = fetch_versioned_implementation_with_fallback(
+                "onyx.db.user_group",
+                "delete_user_group_cc_pair_relationship__no_commit",
+                noop_fallback,
+            )
+            cleanup_user_groups(
+                cc_pair_id=cc_pair_id,
+                db_session=db_session,
+            )
+
+            # finally, delete the cc-pair
+            delete_connector_credential_pair__no_commit(
+                db_session=db_session,
+                connector_id=cc_pair.connector_id,
+                credential_id=cc_pair.credential_id,
+            )
+            # if there are no credentials left, delete the connector
+            connector = fetch_connector_by_id(
+                db_session=db_session,
+                connector_id=cc_pair.connector_id,
+            )
+            if not connector or not len(connector.credentials):
+                task_logger.info(
+                    "Connector deletion - Found no credentials left for connector, deleting connector"
+                )
+                db_session.delete(connector)
+            db_session.commit()
+
+            update_sync_record_status(
+                db_session=db_session,
+                entity_id=cc_pair_id,
+                sync_type=SyncType.CONNECTOR_DELETION,
+                sync_status=SyncStatus.SUCCESS,
+                num_docs_synced=fence_data.num_tasks,
+            )
+
+        except Exception as e:
+            db_session.rollback()
+            stack_trace = traceback.format_exc()
+            error_message = f"Error: {str(e)}\n\nStack Trace:\n{stack_trace}"
+            add_deletion_failure_message(db_session, cc_pair_id, error_message)
+
+            update_sync_record_status(
+                db_session=db_session,
+                entity_id=cc_pair_id,
+                sync_type=SyncType.CONNECTOR_DELETION,
+                sync_status=SyncStatus.FAILED,
+                num_docs_synced=fence_data.num_tasks,
+            )
+
+            task_logger.exception(
+                f"Connector deletion exceptioned: "
+                f"cc_pair={cc_pair_id} connector={cc_pair.connector_id} credential={cc_pair.credential_id}"
+            )
+            raise e
+
+    task_logger.info(
+        f"Connector deletion succeeded: "
+        f"cc_pair={cc_pair_id} "
+        f"connector={cc_pair.connector_id} "
+        f"credential={cc_pair.credential_id} "
+        f"docs_deleted={fence_data.num_tasks}"
+    )
+
+    redis_connector.delete.reset()
+
+
+def monitor_ccpair_indexing_taskset(
+    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
+) -> None:
+    # if the fence doesn't exist, there's nothing to do
+    fence_key = key_bytes.decode("utf-8")
+    composite_id = RedisConnector.get_id_from_fence_key(fence_key)
+    if composite_id is None:
+        task_logger.warning(
+            f"Connector indexing: could not parse composite_id from {fence_key}"
+        )
+        return
+
+    # parse out metadata and initialize the helper class with it
+    parts = composite_id.split("/")
+    if len(parts) != 2:
+        return
+
+    cc_pair_id = int(parts[0])
+    search_settings_id = int(parts[1])
+
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+    redis_connector_index = redis_connector.new_index(search_settings_id)
+    if not redis_connector_index.fenced:
+        return
+
+    payload = redis_connector_index.payload
+    if not payload:
+        return
+
+    elapsed_started_str = None
+    if payload.started:
+        elapsed_started = datetime.now(timezone.utc) - payload.started
+        elapsed_started_str = f"{elapsed_started.total_seconds():.2f}"
+
+    elapsed_submitted = datetime.now(timezone.utc) - payload.submitted
+
+    progress = redis_connector_index.get_progress()
+    if progress is not None:
+        task_logger.info(
+            f"Connector indexing progress: "
+            f"attempt={payload.index_attempt_id} "
+            f"cc_pair={cc_pair_id} "
+            f"search_settings={search_settings_id} "
+            f"progress={progress} "
+            f"elapsed_submitted={elapsed_submitted.total_seconds():.2f} "
+            f"elapsed_started={elapsed_started_str}"
+        )
+
+    if payload.index_attempt_id is None or payload.celery_task_id is None:
+        # the task is still setting up
+        return
+
+    # never use any blocking methods on the result from inside a task!
+    result: AsyncResult = AsyncResult(payload.celery_task_id)
+
+    # inner/outer/inner double check pattern to avoid race conditions when checking for
+    # bad state
+
+    # Verify: if the generator isn't complete, the task must not be in READY state
+    # inner = get_completion / generator_complete not signaled
+    # outer = result.state in READY state
+    status_int = redis_connector_index.get_completion()
+    if status_int is None:  # inner signal not set ... possible error
+        task_state = result.state
+        if (
+            task_state in READY_STATES
+        ):  # outer signal in terminal state ... possible error
+            # Now double check!
+            if redis_connector_index.get_completion() is None:
+                # inner signal still not set (and cannot change when outer result_state is READY)
+                # Task is finished but generator complete isn't set.
+                # We have a problem! Worker may have crashed.
+                task_result = str(result.result)
+                task_traceback = str(result.traceback)
+
+                msg = (
+                    f"Connector indexing aborted or exceptioned: "
+                    f"attempt={payload.index_attempt_id} "
+                    f"celery_task={payload.celery_task_id} "
+                    f"cc_pair={cc_pair_id} "
+                    f"search_settings={search_settings_id} "
+                    f"elapsed_submitted={elapsed_submitted.total_seconds():.2f} "
+                    f"result.state={task_state} "
+                    f"result.result={task_result} "
+                    f"result.traceback={task_traceback}"
+                )
+                task_logger.warning(msg)
+
+                try:
+                    index_attempt = get_index_attempt(
+                        db_session, payload.index_attempt_id
+                    )
+                    if index_attempt:
+                        if (
+                            index_attempt.status != IndexingStatus.CANCELED
+                            and index_attempt.status != IndexingStatus.FAILED
+                        ):
+                            mark_attempt_failed(
+                                index_attempt_id=payload.index_attempt_id,
+                                db_session=db_session,
+                                failure_reason=msg,
+                            )
+                except Exception:
+                    task_logger.exception(
+                        "Connector indexing - Transient exception marking index attempt as failed: "
+                        f"attempt={payload.index_attempt_id} "
+                        f"tenant={tenant_id} "
+                        f"cc_pair={cc_pair_id} "
+                        f"search_settings={search_settings_id}"
+                    )
+
+                redis_connector_index.reset()
+        return
+
+    if redis_connector_index.watchdog_signaled():
+        # if the generator is complete, don't clean up until the watchdog has exited
+        task_logger.info(
+            f"Connector indexing - Delaying finalization until watchdog has exited: "
+            f"attempt={payload.index_attempt_id} "
+            f"cc_pair={cc_pair_id} "
+            f"search_settings={search_settings_id} "
+            f"progress={progress} "
+            f"elapsed_submitted={elapsed_submitted.total_seconds():.2f} "
+            f"elapsed_started={elapsed_started_str}"
+        )
+
+        return
+
+    status_enum = HTTPStatus(status_int)
+
+    task_logger.info(
+        f"Connector indexing finished: "
+        f"attempt={payload.index_attempt_id} "
+        f"cc_pair={cc_pair_id} "
+        f"search_settings={search_settings_id} "
+        f"progress={progress} "
+        f"status={status_enum.name} "
+        f"elapsed_submitted={elapsed_submitted.total_seconds():.2f} "
+        f"elapsed_started={elapsed_started_str}"
+    )
+
+    redis_connector_index.reset()
+
+
+@shared_task(
+    name=OnyxCeleryTask.MONITOR_VESPA_SYNC,
+    ignore_result=True,
+    soft_time_limit=300,
+    bind=True,
+)
+def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool | None:
+    """This is a celery beat task that monitors and finalizes various long running tasks.
+
+    The name monitor_vespa_sync is a bit of a misnomer since it checks many different tasks
+    now. Should change that at some point.
+
+    It scans for fence values and then gets the counts of any associated tasksets.
+    For many tasks, the count is 0, that means all tasks finished and we should clean up.
+
+    This task lock timeout is CELERY_METADATA_SYNC_BEAT_LOCK_TIMEOUT seconds, so don't
+    do anything too expensive in this function!
+
+    Returns True if the task actually did work, False if it exited early to prevent overlap
+    """
+    task_logger.info(f"monitor_vespa_sync starting: tenant={tenant_id}")
+
+    time_start = time.monotonic()
+
+    r = get_redis_client(tenant_id=tenant_id)
+
+    # Replica usage notes
+    #
+    # False negatives are OK. (aka fail to to see a key that exists on the master).
+    # We simply skip the monitoring work and it will be caught on the next pass.
+    #
+    # False positives are not OK, and are possible if we clear a fence on the master and
+    # then read from the replica. In this case, monitoring work could be done on a fence
+    # that no longer exists. To avoid this, we scan from the replica, but double check
+    # the result on the master.
+    r_replica = get_redis_replica_client(tenant_id=tenant_id)
+
+    lock_beat: RedisLock = r.lock(
+        OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK,
+        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
+    )
+
+    # prevent overlapping tasks
+    if not lock_beat.acquire(blocking=False):
+        return None
+
+    try:
+        # print current queue lengths
+        time.monotonic()
+        # we don't need every tenant polling redis for this info.
+        if not MULTI_TENANT or random.randint(1, 10) == 10:
+            r_celery = self.app.broker_connection().channel().client  # type: ignore
+            n_celery = celery_get_queue_length("celery", r_celery)
+            n_indexing = celery_get_queue_length(
+                OnyxCeleryQueues.CONNECTOR_INDEXING, r_celery
+            )
+            n_sync = celery_get_queue_length(
+                OnyxCeleryQueues.VESPA_METADATA_SYNC, r_celery
+            )
+            n_deletion = celery_get_queue_length(
+                OnyxCeleryQueues.CONNECTOR_DELETION, r_celery
+            )
+            n_pruning = celery_get_queue_length(
+                OnyxCeleryQueues.CONNECTOR_PRUNING, r_celery
+            )
+            n_permissions_sync = celery_get_queue_length(
+                OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC, r_celery
+            )
+            n_external_group_sync = celery_get_queue_length(
+                OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC, r_celery
+            )
+            n_permissions_upsert = celery_get_queue_length(
+                OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT, r_celery
+            )
+
+            prefetched = celery_get_unacked_task_ids(
+                OnyxCeleryQueues.CONNECTOR_INDEXING, r_celery
+            )
+
+            task_logger.info(
+                f"Queue lengths: celery={n_celery} "
+                f"indexing={n_indexing} "
+                f"indexing_prefetched={len(prefetched)} "
+                f"sync={n_sync} "
+                f"deletion={n_deletion} "
+                f"pruning={n_pruning} "
+                f"permissions_sync={n_permissions_sync} "
+                f"external_group_sync={n_external_group_sync} "
+                f"permissions_upsert={n_permissions_upsert} "
+            )
+
+        # we want to run this less frequently than the overall task
+        if not r.exists(OnyxRedisSignals.BLOCK_BUILD_FENCE_LOOKUP_TABLE):
+            # build a lookup table of existing fences
+            # this is just a migration concern and should be unnecessary once
+            # lookup tables are rolled out
+            for key_bytes in r_replica.scan_iter(count=SCAN_ITER_COUNT_DEFAULT):
+                if is_fence(key_bytes) and not r.sismember(
+                    OnyxRedisConstants.ACTIVE_FENCES, key_bytes
+                ):
+                    logger.warning(f"Adding {key_bytes} to the lookup table.")
+                    r.sadd(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
+
+            r.set(OnyxRedisSignals.BLOCK_BUILD_FENCE_LOOKUP_TABLE, 1, ex=300)
+
+        # use a lookup table to find active fences. We still have to verify the fence
+        # exists since it is an optimization and not the source of truth.
+        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
+        for key in keys:
+            key_bytes = cast(bytes, key)
+
+            if not r.exists(key_bytes):
+                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
+                continue
+
+            key_str = key_bytes.decode("utf-8")
+            if key_str == RedisGlobalConnectorCredentialPair.FENCE_KEY:
+                monitor_connector_taskset(r)
+            elif key_str.startswith(RedisDocumentSet.FENCE_PREFIX):
+                with get_session_with_tenant(tenant_id) as db_session:
+                    monitor_document_set_taskset(tenant_id, key_bytes, r, db_session)
+            elif key_str.startswith(RedisUserGroup.FENCE_PREFIX):
+                monitor_usergroup_taskset = (
+                    fetch_versioned_implementation_with_fallback(
+                        "onyx.background.celery.tasks.vespa.tasks",
+                        "monitor_usergroup_taskset",
+                        noop_fallback,
+                    )
+                )
+                with get_session_with_tenant(tenant_id) as db_session:
+                    monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session)
+            elif key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):
+                monitor_connector_deletion_taskset(tenant_id, key_bytes, r)
+            elif key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):
+                with get_session_with_tenant(tenant_id) as db_session:
+                    monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session)
+            elif key_str.startswith(RedisConnectorIndex.FENCE_PREFIX):
+                with get_session_with_tenant(tenant_id) as db_session:
+                    monitor_ccpair_indexing_taskset(tenant_id, key_bytes, r, db_session)
+            elif key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):
+                with get_session_with_tenant(tenant_id) as db_session:
+                    monitor_ccpair_permissions_taskset(
+                        tenant_id, key_bytes, r, db_session
+                    )
+            else:
+                pass
+    except SoftTimeLimitExceeded:
+        task_logger.info(
+            "Soft time limit exceeded, task is being terminated gracefully."
+        )
+        return False
+    except Exception:
+        task_logger.exception("monitor_vespa_sync exceptioned.")
+        return False
+    finally:
+        if lock_beat.owned():
+            lock_beat.release()
+        else:
+            task_logger.error(
+                "monitor_vespa_sync - Lock not owned on completion: "
+                f"tenant={tenant_id}"
+                # f"timings={timings}"
+            )
+            redis_lock_dump(lock_beat, r)
+
+    time_elapsed = time.monotonic() - time_start
+    task_logger.info(f"monitor_vespa_sync finished: elapsed={time_elapsed:.2f}")
+    return True
+
+
@shared_task(
    name=OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
    bind=True,
@@ -523,13 +982,13 @@ def monitor_document_set_taskset(
    time_limit=LIGHT_TIME_LIMIT,
    max_retries=3,
 )
-def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) -> bool:
+def vespa_metadata_sync_task(
+    self: Task, document_id: str, tenant_id: str | None
+) -> bool:
    start = time.monotonic()

-    completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED
-
    try:
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            active_search_settings = get_active_search_settings(db_session)
            doc_index = get_default_document_index(
                search_settings=active_search_settings.primary,
@@ -541,103 +1000,95 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) ->

            doc = get_document(document_id, db_session)
            if not doc:
-                elapsed = time.monotonic() - start
-                task_logger.info(
-                    f"doc={document_id} "
-                    f"action=no_operation "
-                    f"elapsed={elapsed:.2f}"
-                )
-                completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED
-            else:
-                # document set sync
-                doc_sets = fetch_document_sets_for_document(document_id, db_session)
-                update_doc_sets: set[str] = set(doc_sets)
+                return False

-                # User group sync
-                doc_access = get_access_for_document(
-                    document_id=document_id, db_session=db_session
-                )
+            # document set sync
+            doc_sets = fetch_document_sets_for_document(document_id, db_session)
+            update_doc_sets: set[str] = set(doc_sets)

-                fields = VespaDocumentFields(
-                    document_sets=update_doc_sets,
-                    access=doc_access,
-                    boost=doc.boost,
-                    hidden=doc.hidden,
-                )
-
-                # update Vespa. OK if doc doesn't exist. Raises exception otherwise.
-                chunks_affected = retry_index.update_single(
-                    document_id,
-                    tenant_id=tenant_id,
-                    chunk_count=doc.chunk_count,
-                    fields=fields,
-                )
-
-                # update db last. Worst case = we crash right before this and
-                # the sync might repeat again later
-                mark_document_as_synced(document_id, db_session)
-
-                elapsed = time.monotonic() - start
-                task_logger.info(
-                    f"doc={document_id} "
-                    f"action=sync "
-                    f"chunks={chunks_affected} "
-                    f"elapsed={elapsed:.2f}"
-                )
-                completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED
-    except SoftTimeLimitExceeded:
-        task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
-        completion_status = OnyxCeleryTaskCompletionStatus.SOFT_TIME_LIMIT
-    except Exception as ex:
-        e: Exception | None = None
-        while True:
-            if isinstance(ex, RetryError):
-                task_logger.warning(
-                    f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
-                )
-
-                # only set the inner exception if it is of type Exception
-                e_temp = ex.last_attempt.exception()
-                if isinstance(e_temp, Exception):
-                    e = e_temp
-            else:
-                e = ex
-
-            if isinstance(e, httpx.HTTPStatusError):
-                if e.response.status_code == HTTPStatus.BAD_REQUEST:
-                    task_logger.exception(
-                        f"Non-retryable HTTPStatusError: "
-                        f"doc={document_id} "
-                        f"status={e.response.status_code}"
-                    )
-                completion_status = (
-                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
-                )
-                break
-
-            task_logger.exception(
-                f"vespa_metadata_sync_task exceptioned: doc={document_id}"
+            # User group sync
+            doc_access = get_access_for_document(
+                document_id=document_id, db_session=db_session
            )

-            completion_status = OnyxCeleryTaskCompletionStatus.RETRYABLE_EXCEPTION
-            if (
-                self.max_retries is not None
-                and self.request.retries >= self.max_retries
-            ):
-                completion_status = (
-                    OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION
-                )
+            fields = VespaDocumentFields(
+                document_sets=update_doc_sets,
+                access=doc_access,
+                boost=doc.boost,
+                hidden=doc.hidden,
+            )

-            # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
-            countdown = 2 ** (self.request.retries + 4)
-            self.retry(exc=e, countdown=countdown)  # this will raise a celery exception
-            break  # we won't hit this, but it looks weird not to have it
-    finally:
-        task_logger.info(
-            f"vespa_metadata_sync_task completed: status={completion_status.value} doc={document_id}"
+            # update Vespa. OK if doc doesn't exist. Raises exception otherwise.
+            chunks_affected = retry_index.update_single(
+                document_id,
+                tenant_id=tenant_id,
+                chunk_count=doc.chunk_count,
+                fields=fields,
+            )
+
+            # update db last. Worst case = we crash right before this and
+            # the sync might repeat again later
+            mark_document_as_synced(document_id, db_session)
+
+            elapsed = time.monotonic() - start
+            task_logger.info(
+                f"doc={document_id} "
+                f"action=sync "
+                f"chunks={chunks_affected} "
+                f"elapsed={elapsed:.2f}"
+            )
+    except SoftTimeLimitExceeded:
+        task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
+        return False
+    except Exception as ex:
+        e: Exception | None = None
+        if isinstance(ex, RetryError):
+            task_logger.warning(
+                f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}"
+            )
+
+            # only set the inner exception if it is of type Exception
+            e_temp = ex.last_attempt.exception()
+            if isinstance(e_temp, Exception):
+                e = e_temp
+        else:
+            e = ex
+
+        if isinstance(e, httpx.HTTPStatusError):
+            if e.response.status_code == HTTPStatus.BAD_REQUEST:
+                task_logger.exception(
+                    f"Non-retryable HTTPStatusError: "
+                    f"doc={document_id} "
+                    f"status={e.response.status_code}"
+                )
+            return False
+
+        task_logger.exception(
+            f"Unexpected exception during vespa metadata sync: doc={document_id}"
        )

-    if completion_status != OnyxCeleryTaskCompletionStatus.SUCCEEDED:
-        return False
+        # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64
+        countdown = 2 ** (self.request.retries + 4)
+        self.retry(exc=e, countdown=countdown)

    return True
+
+
+def is_fence(key_bytes: bytes) -> bool:
+    key_str = key_bytes.decode("utf-8")
+    if key_str == RedisGlobalConnectorCredentialPair.FENCE_KEY:
+        return True
+    if key_str.startswith(RedisDocumentSet.FENCE_PREFIX):
+        return True
+    if key_str.startswith(RedisUserGroup.FENCE_PREFIX):
+        return True
+    if key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):
+        return True
+    if key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):
+        return True
+    if key_str.startswith(RedisConnectorIndex.FENCE_PREFIX):
+        return True
+    if key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):
+        return True
+
+    return False
--- a/backend/onyx/background/error_logging.py
+++ b/backend/onyx/background/error_logging.py
@@ -1,20 +0,0 @@
-from sqlalchemy.exc import IntegrityError
-
-from onyx.db.background_error import create_background_error
-from onyx.db.engine import get_session_with_current_tenant
-
-
-def emit_background_error(
-    message: str,
-    cc_pair_id: int | None = None,
-) -> None:
-    """Currently just saves a row in the background_errors table.
-
-    In the future, could create notifications based on the severity."""
-    with get_session_with_current_tenant() as db_session:
-        try:
-            create_background_error(db_session, message, cc_pair_id)
-        except IntegrityError as e:
-            # Log an error if the cc_pair_id was deleted or any other exception occurs
-            error_message = f"Failed to create background error: {str(e)}. Original message: {message}"
-            create_background_error(db_session, error_message, None)
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
pablodanswer	92ddd00731	k	2025-02-13 14:55:24 -08:00
pablonyx	010e42d886	Pause cloud tasks for gated tenants (#3990 )	2025-02-13 14:43:13 -08:00
pablodanswer	75ea486912	k	2025-02-13 12:53:58 -08:00
pablodanswer	ac252f7ba7	k	2025-02-13 12:53:44 -08:00
pablodanswer	eaf33d152b	k	2025-02-13 12:52:16 -08:00
pablodanswer	f77481d7ba	update	2025-02-13 12:52:16 -08:00
pablodanswer	53d81bd027	k	2025-02-13 12:52:16 -08:00
pablodanswer	0cf7c74ec5	quick nit	2025-02-13 12:52:16 -08:00
pablodanswer	e673eacbb6	improvements	2025-02-13 12:52:16 -08:00
pablodanswer	1f1b4af48b	various billing updates	2025-02-13 12:52:16 -08:00
pablodanswer	42318710c6	misct billing_fixes g	2025-02-13 12:52:16 -08:00