fix

ensure checks pass
remove unnecessary logs
2026-02-17 07:45:47 +00:00 · 2025-03-06 14:46:20 -08:00 · 2025-03-06 14:46:20 -08:00 · 2025-03-06 14:46:20 -08:00 · 2025-03-06 14:46:19 -08:00 · 2025-03-06 14:46:19 -08:00
733 changed files with 31843 additions and 11618 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -0,0 +1 @@
+* @onyx-dot-app/onyx-core-team
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -65,6 +65,7 @@ jobs:
            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
+            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
            NEXT_PUBLIC_GTM_ENABLED=true
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -12,7 +12,32 @@ env:
  BUILDKIT_PROGRESS: plain

 jobs:
+  # 1) Preliminary job to check if the changed files are relevant
+  check_model_server_changes:
+    runs-on: ubuntu-latest
+    outputs:
+      changed: ${{ steps.check.outputs.changed }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Check if relevant files changed
+        id: check
+        run: |
+          # Default to "false"
+          echo "changed=false" >> $GITHUB_OUTPUT
+
+          # Compare the previous commit (github.event.before) to the current one (github.sha)
+          # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
+          # set changed=true
+          if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
+             | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
+            echo "changed=true" >> $GITHUB_OUTPUT
+          fi
+
  build-amd64:
+    needs: [check_model_server_changes]
+    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on:
      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
    steps:
@@ -52,6 +77,8 @@ jobs:
          provenance: false

  build-arm64:
+    needs: [check_model_server_changes]
+    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on:
      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"]
    steps:
@@ -91,7 +118,8 @@ jobs:
          provenance: false

  merge-and-scan:
-    needs: [build-amd64, build-arm64]
+    needs: [build-amd64, build-arm64, check_model_server_changes]
+    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on: ubuntu-latest
    steps:
      - name: Login to Docker Hub
--- a/.github/workflows/nightly-scan-licenses.yml
+++ b/.github/workflows/nightly-scan-licenses.yml
@@ -53,24 +53,90 @@ jobs:
          exclude: '(?i)^(pylint|aio[-_]*).*'
          
      - name: Print report
-        if: ${{ always() }}
+        if: always()
        run: echo "${{ steps.license_check_report.outputs.report }}"
      
      - name: Install npm dependencies
        working-directory: ./web
        run: npm ci
-        
-      - name: Run Trivy vulnerability scanner in repo mode
-        uses: aquasecurity/trivy-action@0.28.0
-        with:
-          scan-type: fs
-          scanners: license
-          format: table
-#           format: sarif
-#           output: trivy-results.sarif
-          severity: HIGH,CRITICAL

-#       - name: Upload Trivy scan results to GitHub Security tab
-#         uses: github/codeql-action/upload-sarif@v3
+        # be careful enabling the sarif and upload as it may spam the security tab
+        # with a huge amount of items. Work out the issues before enabling upload.       
+#       - name: Run Trivy vulnerability scanner in repo mode
+#         if: always()
+#         uses: aquasecurity/trivy-action@0.29.0
 #         with:
-#           sarif_file: trivy-results.sarif
+#           scan-type: fs
+#           scan-ref: .
+#           scanners: license
+#           format: table
+#           severity: HIGH,CRITICAL
+# #           format: sarif
+# #           output: trivy-results.sarif
+# 
+# #       - name: Upload Trivy scan results to GitHub Security tab
+# #         uses: github/codeql-action/upload-sarif@v3
+# #         with:
+# #           sarif_file: trivy-results.sarif
+
+  scan-trivy:
+    # See https://runs-on.com/runners/linux/
+    runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"]
+      
+    steps:
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Login to Docker Hub
+      uses: docker/login-action@v3
+      with:
+        username: ${{ secrets.DOCKER_USERNAME }}
+        password: ${{ secrets.DOCKER_TOKEN }}
+
+    # Backend
+    - name: Pull backend docker image
+      run: docker pull onyxdotapp/onyx-backend:latest
+
+    - name: Run Trivy vulnerability scanner on backend
+      uses: aquasecurity/trivy-action@0.29.0
+      env:
+        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
+        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
+      with:
+        image-ref: onyxdotapp/onyx-backend:latest
+        scanners: license
+        severity: HIGH,CRITICAL
+        vuln-type: library
+        exit-code: 0  # Set to 1 if we want a failed scan to fail the workflow
+
+    # Web server
+    - name: Pull web server docker image
+      run: docker pull onyxdotapp/onyx-web-server:latest
+          
+    - name: Run Trivy vulnerability scanner on web server
+      uses: aquasecurity/trivy-action@0.29.0
+      env:
+        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
+        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
+      with:
+        image-ref: onyxdotapp/onyx-web-server:latest
+        scanners: license
+        severity: HIGH,CRITICAL
+        vuln-type: library
+        exit-code: 0
+
+    # Model server
+    - name: Pull model server docker image
+      run: docker pull onyxdotapp/onyx-model-server:latest
+
+    - name: Run Trivy vulnerability scanner
+      uses: aquasecurity/trivy-action@0.29.0
+      env:
+        TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2'
+        TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1'
+      with:
+        image-ref: onyxdotapp/onyx-model-server:latest
+        scanners: license
+        severity: HIGH,CRITICAL
+        vuln-type: library
+        exit-code: 0
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -94,13 +94,12 @@ jobs:
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
          MULTI_TENANT=true \
-          LOG_LEVEL=DEBUG \
          AUTH_TYPE=cloud \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
          DEV_MODE=true \
-          docker compose -f docker-compose.multitenant-dev.yml -p danswer-stack up -d
+          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack up -d
        id: start_docker_multi_tenant

      # In practice, `cloud` Auth type would require OAUTH credentials to be set.
@@ -109,14 +108,14 @@ jobs:
          echo "Waiting for 3 minutes to ensure API server is ready..."
          sleep 180
          echo "Running integration tests..."
-          docker run --rm --network danswer-stack_default \
+          docker run --rm --network onyx-stack_default \
            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
            -e POSTGRES_DB=postgres \
+            -e POSTGRES_USE_NULL_POOL=true \
            -e VESPA_HOST=index \
-            -e LOG_LEVEL=DEBUG \
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
@@ -145,25 +144,28 @@ jobs:
      - name: Stop multi-tenant Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.multitenant-dev.yml -p danswer-stack down -v
+          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack down -v

+      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
          AUTH_TYPE=basic \
+          POSTGRES_POOL_PRE_PING=true \
+          POSTGRES_USE_NULL_POOL=true \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
-          LOG_LEVEL=DEBUG \
-          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
+          INTEGRATION_TESTS_MODE=true \
+          docker compose -f docker-compose.dev.yml -p onyx-stack up -d
        id: start_docker

      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."

-          docker logs -f danswer-stack-api_server-1 &
+          docker logs -f onyx-stack-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
@@ -193,18 +195,26 @@ jobs:
          done
          echo "Finished waiting for service."

+      - name: Start Mock Services
+        run: |
+          cd backend/tests/integration/mock_services
+          docker compose -f docker-compose.mock-it-services.yml \
+            -p mock-it-services-stack up -d
+
+      # NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
      - name: Run Standard Integration Tests
        run: |
          echo "Running integration tests..."
-          docker run --rm --network danswer-stack_default \
+          docker run --rm --network onyx-stack_default \
            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
            -e POSTGRES_DB=postgres \
+            -e POSTGRES_POOL_PRE_PING=true \
+            -e POSTGRES_USE_NULL_POOL=true \
            -e VESPA_HOST=index \
            -e REDIS_HOST=cache \
-            -e LOG_LEVEL=DEBUG \
            -e API_SERVER_HOST=api_server \
            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
@@ -212,6 +222,8 @@ jobs:
            -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
            -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
            -e TEST_WEB_HOSTNAME=test-runner \
+            -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
+            -e MOCK_CONNECTOR_SERVER_PORT=8001 \
            onyxdotapp/onyx-integration:test \
            /app/tests/integration/tests \
            /app/tests/integration/connector_job_tests
@@ -233,13 +245,13 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
+          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true

      - name: Dump all-container logs (optional)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
@@ -253,4 +265,4 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
+          docker compose -f docker-compose.dev.yml -p onyx-stack down -v
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -1,6 +1,6 @@
-name: Run Chromatic Tests
+name: Run Playwright Tests
 concurrency:
-  group: Run-Chromatic-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  group: Run-Playwright-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

 on: push
@@ -198,43 +198,47 @@ jobs:
          cd deployment/docker_compose
          docker compose -f docker-compose.dev.yml -p danswer-stack down -v

-  chromatic-tests:
-    name: Chromatic Tests
+# NOTE: Chromatic UI diff testing is currently disabled.
+# We are using Playwright for local and CI testing without visual regression checks.
+# Chromatic may be reintroduced in the future for UI diff testing if needed.

-    needs: playwright-tests
-    runs-on:
-      [
-        runs-on,
-        runner=32cpu-linux-x64,
-        disk=large,
-        "run-id=${{ github.run_id }}",
-      ]
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
+# chromatic-tests:
+#   name: Chromatic Tests

-      - name: Setup node
-        uses: actions/setup-node@v4
-        with:
-          node-version: 22
+#   needs: playwright-tests
+#   runs-on:
+#     [
+#       runs-on,
+#       runner=32cpu-linux-x64,
+#       disk=large,
+#       "run-id=${{ github.run_id }}",
+#     ]
+#   steps:
+#     - name: Checkout code
+#       uses: actions/checkout@v4
+#       with:
+#         fetch-depth: 0

-      - name: Install node dependencies
-        working-directory: ./web
-        run: npm ci
+#     - name: Setup node
+#       uses: actions/setup-node@v4
+#       with:
+#         node-version: 22

-      - name: Download Playwright test results
-        uses: actions/download-artifact@v4
-        with:
-          name: test-results
-          path: ./web/test-results
+#     - name: Install node dependencies
+#       working-directory: ./web
+#       run: npm ci

-      - name: Run Chromatic
-        uses: chromaui/action@latest
-        with:
-          playwright: true
-          projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
-          workingDir: ./web
-        env:
-          CHROMATIC_ARCHIVE_LOCATION: ./test-results
+#     - name: Download Playwright test results
+#       uses: actions/download-artifact@v4
+#       with:
+#         name: test-results
+#         path: ./web/test-results
+
+#     - name: Run Chromatic
+#       uses: chromaui/action@latest
+#       with:
+#         playwright: true
+#         projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
+#         workingDir: ./web
+#       env:
+#         CHROMATIC_ARCHIVE_LOCATION: ./test-results
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -44,6 +44,9 @@ env:
  SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
  SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
  SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
+  # Gitbook
+  GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
+  GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}

 jobs:
  connectors-check:
@@ -71,7 +74,9 @@ jobs:
          python -m pip install --upgrade pip
          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-
+          playwright install chromium
+          playwright install-deps chromium
+          
      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -1,18 +1,29 @@
-name: Connector Tests
+name: Model Server Tests

 on:
  schedule:
    # This cron expression runs the job daily at 16:00 UTC (9am PT)
    - cron: "0 16 * * *"
-
+  workflow_dispatch:
+    inputs:
+      branch:
+        description: 'Branch to run the workflow on'
+        required: false
+        default: 'main'
+        
 env:
  # Bedrock
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
  AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}

-  # OpenAI
+  # API keys for testing
+  COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
+  LITELLM_API_KEY: ${{ secrets.LITELLM_API_KEY }}
+  LITELLM_API_URL: ${{ secrets.LITELLM_API_URL }}
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
+  AZURE_API_URL: ${{ secrets.AZURE_API_URL }}

 jobs:
  model-check:
@@ -26,6 +37,23 @@ jobs:
      - name: Checkout code
        uses: actions/checkout@v4

+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      # tag every docker image with "test" so that we can spin up the correct set
+      # of images during testing
+
+      # We don't need to build the Web Docker image since it's not yet used
+      # in the integration tests. We have a separate action to verify that it builds
+      # successfully.
+      - name: Pull Model Server Docker image
+        run: |
+          docker pull onyxdotapp/onyx-model-server:latest
+          docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
+          
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
@@ -41,6 +69,49 @@ jobs:
          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt

+      - name: Start Docker containers
+        run: |
+          cd deployment/docker_compose
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
+          AUTH_TYPE=basic \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          IMAGE_TAG=test \
+          docker compose -f docker-compose.model-server-test.yml -p onyx-stack up -d indexing_model_server
+        id: start_docker
+
+      - name: Wait for service to be ready
+        run: |
+          echo "Starting wait-for-service script..."
+
+          start_time=$(date +%s)
+          timeout=300  # 5 minutes in seconds
+
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+            
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. Service did not become ready in 5 minutes."
+              exit 1
+            fi
+            
+            # Use curl with error handling to ignore specific exit code 56
+            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
+            
+            if [ "$response" = "200" ]; then
+              echo "Service is ready!"
+              break
+            elif [ "$response" = "curl_error" ]; then
+              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
+            else
+              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
+            fi
+            
+            sleep 5
+          done
+          echo "Finished waiting for service."
+          
      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
@@ -56,3 +127,23 @@ jobs:
            -H 'Content-type: application/json' \
            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
            $SLACK_WEBHOOK
+            
+      - name: Dump all-container logs (optional)
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.model-server-test.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: docker-all-logs
+          path: ${{ github.workspace }}/docker-compose.log
+          
+      - name: Stop Docker containers
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.model-server-test.yml -p onyx-stack down -v
+          
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -205,7 +205,7 @@
                "--loglevel=INFO",
                "--hostname=light@%n",
                "-Q",
-                "vespa_metadata_sync,connector_deletion,doc_permissions_upsert",
+                "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup",
            ],
            "presentation": {
 				 "group": "2",
--- a/README.md
+++ b/README.md
@@ -24,112 +24,93 @@
 </a>
 </p>

-<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI Assistant connected to your company's docs, apps, and people.
-Onyx provides a Chat interface and plugs into any LLM of your choice. Onyx can be deployed anywhere and for any
-scale - on a laptop, on-premise, or to cloud. Since you own the deployment, your user data and chats are fully in your
-own control. Onyx is dual Licensed with most of it under MIT license and designed to be modular and easily extensible. The system also comes fully ready
-for production usage with user authentication, role management (admin/basic users), chat persistence, and a UI for
-configuring AI Assistants.
+<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI platform connected to your company's docs, apps, and people.
+Onyx provides a feature rich Chat interface and plugs into any LLM of your choice.
+Keep knowledge and access controls sync-ed across over 40 connectors like Google Drive, Slack, Confluence, Salesforce, etc.
+Create custom AI agents with unique prompts, knowledge, and actions that the agents can take.
+Onyx can be deployed securely anywhere and for any scale - on a laptop, on-premise, or to cloud.

-Onyx also serves as a Enterprise Search across all common workplace tools such as Slack, Google Drive, Confluence, etc.
-By combining LLMs and team specific knowledge, Onyx becomes a subject matter expert for the team. Imagine ChatGPT if
-it had access to your team's unique knowledge! It enables questions such as "A customer wants feature X, is this already
-supported?" or "Where's the pull request for feature Y?"

-<h3>Usage</h3>
+<h3>Feature Highlights</h3>

-Onyx Web App:
+**Deep research over your team's knowledge:**

-https://github.com/onyx-dot-app/onyx/assets/32520769/563be14c-9304-47b5-bf0a-9049c2b6f410
+https://private-user-images.githubusercontent.com/32520769/414509312-48392e83-95d0-4fb5-8650-a396e05e0a32.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3Mzk5Mjg2MzYsIm5iZiI6MTczOTkyODMzNiwicGF0aCI6Ii8zMjUyMDc2OS80MTQ1MDkzMTItNDgzOTJlODMtOTVkMC00ZmI1LTg2NTAtYTM5NmUwNWUwYTMyLm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAyMTklMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMjE5VDAxMjUzNlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPWFhMzk5Njg2Y2Y5YjFmNDNiYTQ2YzM5ZTg5YWJiYTU2NWMyY2YwNmUyODE2NWUxMDRiMWQxZWJmODI4YTA0MTUmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.a9D8A0sgKE9AoaoE-mfFbJ6_OKYeqaf7TZ4Han2JfW8

-Or, plug Onyx into your existing Slack workflows (more integrations to come 😁):

-https://github.com/onyx-dot-app/onyx/assets/25087905/3e19739b-d178-4371-9a38-011430bdec1b
+**Use Onyx as a secure AI Chat with any LLM:**
+
+![Onyx Chat Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxChatSilentDemo.gif)
+
+
+**Easily set up connectors to your apps:**
+
+![Onyx Connector Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxConnectorSilentDemo.gif)
+
+
+**Access Onyx where your team already works:**
+
+![Onyx Bot Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxBot.png)

-For more details on the Admin UI to manage connectors and users, check out our
-<strong><a href="https://www.youtube.com/watch?v=geNzY1nbCnU">Full Video Demo</a></strong>!

 ## Deployment
+**To try it out for free and get started in seconds, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.

-Onyx can easily be run locally (even on a laptop) or deployed on a virtual machine with a single
+Onyx can also be run locally (even on a laptop) or deployed on a virtual machine with a single
 `docker compose` command. Checkout our [docs](https://docs.onyx.app/quickstart) to learn more.

-We also have built-in support for deployment on Kubernetes. Files for that can be found [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment/kubernetes).
+We also have built-in support for high-availability/scalable deployment on Kubernetes.
+References [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment).

-## 💃 Main Features

- Chat UI with the ability to select documents to chat with.
- Create custom AI Assistants with different prompts and backing knowledge sets.
- Connect Onyx with LLM of your choice (self-host for a fully airgapped solution).
- Document Search + AI Answers for natural language queries.
- Connectors to all common workplace tools like Google Drive, Confluence, Slack, etc.
- Slack integration to get answers and search results directly in Slack.
+## 🔍 Other Notable Benefits of Onyx
+- Custom deep learning models for indexing and inference time, only through Onyx + learning from user feedback.
+- Flexible security features like SSO (OIDC/SAML/OAuth2), RBAC, encryption of credentials, etc.
+- Knowledge curation features like document-sets, query history, usage analytics, etc.
+- Scalable deployment options tested up to many tens of thousands users and hundreds of millions of documents.
+

 ## 🚧 Roadmap
-
- Chat/Prompt sharing with specific teammates and user groups.
- Multimodal model support, chat with images, video etc.
- Choosing between LLMs and parameters during chat session.
- Tool calling and agent configurations options.
+- New methods in information retrieval (StructRAG, LightGraphRAG, etc.)
+- Personalized Search
 - Organizational understanding and ability to locate and suggest experts from your team.
+- Code Search
+- SQL and Structured Query Language

-## Other Notable Benefits of Onyx
-
- User Authentication with document level access management.
- Best in class Hybrid Search across all sources (BM-25 + prefix aware embedding models).
- Admin Dashboard to configure connectors, document-sets, access, etc.
- Custom deep learning models + learn from user feedback.
- Easy deployment and ability to host Onyx anywhere of your choosing.

 ## 🔌 Connectors
+Keep knowledge and access up to sync across 40+ connectors:

-Efficiently pulls the latest changes from:
-
- Slack
- GitHub
 - Google Drive
 - Confluence
+- Slack
+- Gmail
+- Salesforce
+- Microsoft Sharepoint
+- Github
 - Jira
 - Zendesk
- Gmail
- Notion
 - Gong
- Slab
- Linear
- Productboard
- Guru
- Bookstack
- Document360
- Sharepoint
- Hubspot
+- Microsoft Teams
+- Dropbox
 - Local Files
 - Websites
 - And more ...

-## 📚 Editions
+See the full list [here](https://docs.onyx.app/connectors).

+
+## 📚 Licensing
 There are two editions of Onyx:

- Onyx Community Edition (CE) is available freely under the MIT Expat license. This version has ALL the core features discussed above. This is the version of Onyx you will get if you follow the Deployment guide above.
- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations. Specifically, this includes:
-  - Single Sign-On (SSO), with support for both SAML and OIDC
-  - Role-based access control
-  - Document permission inheritance from connected sources
-  - Usage analytics and query history accessible to admins
-  - Whitelabeling
-  - API key authentication
-  - Encryption of secrets
-  - And many more! Checkout [our website](https://www.onyx.app/) for the latest.
+- Onyx Community Edition (CE) is available freely under the MIT Expat license. Simply follow the Deployment guide above.
+- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
+For feature details, check out [our website](https://www.onyx.app/pricing).

 To try the Onyx Enterprise Edition:
+1. Checkout [Onyx Cloud](https://cloud.onyx.app/signup).
+2. For self-hosting the Enterprise Edition, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).

-1. Checkout our [Cloud product](https://cloud.onyx.app/signup).
-2. For self-hosting, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).

 ## 💡 Contributing
-
 Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
-
-## ⭐Star History
-
-[![Star History Chart](https://api.star-history.com/svg?repos=onyx-dot-app/onyx&type=Date)](https://star-history.com/#onyx-dot-app/onyx&Date)
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -28,14 +28,16 @@ RUN apt-get update && \
        curl \
        zip \
        ca-certificates \
-        libgnutls30=3.7.9-2+deb12u3 \
-        libblkid1=2.38.1-5+deb12u1 \
-        libmount1=2.38.1-5+deb12u1 \
-        libsmartcols1=2.38.1-5+deb12u1 \
-        libuuid1=2.38.1-5+deb12u1 \
+        libgnutls30 \
+        libblkid1 \
+        libmount1 \
+        libsmartcols1 \
+        libuuid1 \
        libxmlsec1-dev \
        pkg-config \
-        gcc && \
+        gcc \
+        nano \
+        vim && \
    rm -rf /var/lib/apt/lists/* && \
    apt-get clean

--- a/backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
+++ b/backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
@@ -0,0 +1,27 @@
+"""Add indexes to document__tag
+
+Revision ID: 1a03d2c2856b
+Revises: 9c00a2bccb83
+Create Date: 2025-02-18 10:45:13.957807
+
+"""
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "1a03d2c2856b"
+down_revision = "9c00a2bccb83"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_index(
+        op.f("ix_document__tag_tag_id"),
+        "document__tag",
+        ["tag_id"],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(op.f("ix_document__tag_tag_id"), table_name="document__tag")
--- a/backend/alembic/versions/2cdeff6d8c93_set_built_in_to_default.py
+++ b/backend/alembic/versions/2cdeff6d8c93_set_built_in_to_default.py
@@ -0,0 +1,32 @@
+"""set built in to default
+
+Revision ID: 2cdeff6d8c93
+Revises: f5437cc136c5
+Create Date: 2025-02-11 14:57:51.308775
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "2cdeff6d8c93"
+down_revision = "f5437cc136c5"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Prior to this migration / point in the codebase history,
+    # built in personas were implicitly treated as default personas (with no option to change this)
+    # This migration makes that explicit
+    op.execute(
+        """
+        UPDATE persona
+        SET is_default_persona = TRUE
+        WHERE builtin_persona = TRUE
+    """
+    )
+
+
+def downgrade() -> None:
+    pass
--- a/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
+++ b/backend/alembic/versions/3934b1bc7b62_update_github_connector_repo_name_to_.py
@@ -0,0 +1,125 @@
+"""Update GitHub connector repo_name to repositories
+
+Revision ID: 3934b1bc7b62
+Revises: b7c2b63c4a03
+Create Date: 2025-03-05 10:50:30.516962
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import json
+import logging
+
+# revision identifiers, used by Alembic.
+revision = "3934b1bc7b62"
+down_revision = "b7c2b63c4a03"
+branch_labels = None
+depends_on = None
+
+logger = logging.getLogger("alembic.runtime.migration")
+
+
+def upgrade() -> None:
+    # Get all GitHub connectors
+    conn = op.get_bind()
+
+    # First get all GitHub connectors
+    github_connectors = conn.execute(
+        sa.text(
+            """
+            SELECT id, connector_specific_config
+            FROM connector
+            WHERE source = 'GITHUB'
+            """
+        )
+    ).fetchall()
+
+    # Update each connector's config
+    updated_count = 0
+    for connector_id, config in github_connectors:
+        try:
+            if not config:
+                logger.warning(f"Connector {connector_id} has no config, skipping")
+                continue
+
+            # Parse the config if it's a string
+            if isinstance(config, str):
+                config = json.loads(config)
+
+            if "repo_name" not in config:
+                continue
+
+            # Create new config with repositories instead of repo_name
+            new_config = dict(config)
+            repo_name_value = new_config.pop("repo_name")
+            new_config["repositories"] = repo_name_value
+
+            # Update the connector with the new config
+            conn.execute(
+                sa.text(
+                    """
+                    UPDATE connector
+                    SET connector_specific_config = :new_config
+                    WHERE id = :connector_id
+                    """
+                ),
+                {"connector_id": connector_id, "new_config": json.dumps(new_config)},
+            )
+            updated_count += 1
+        except Exception as e:
+            logger.error(f"Error updating connector {connector_id}: {str(e)}")
+
+
+def downgrade() -> None:
+    # Get all GitHub connectors
+    conn = op.get_bind()
+
+    logger.debug(
+        "Starting rollback of GitHub connectors from repositories to repo_name"
+    )
+
+    github_connectors = conn.execute(
+        sa.text(
+            """
+            SELECT id, connector_specific_config
+            FROM connector
+            WHERE source = 'GITHUB'
+            """
+        )
+    ).fetchall()
+
+    logger.debug(f"Found {len(github_connectors)} GitHub connectors to rollback")
+
+    # Revert each GitHub connector to use repo_name instead of repositories
+    reverted_count = 0
+    for connector_id, config in github_connectors:
+        try:
+            if not config:
+                continue
+
+            # Parse the config if it's a string
+            if isinstance(config, str):
+                config = json.loads(config)
+
+            if "repositories" not in config:
+                continue
+
+            # Create new config with repo_name instead of repositories
+            new_config = dict(config)
+            repositories_value = new_config.pop("repositories")
+            new_config["repo_name"] = repositories_value
+
+            # Update the connector with the new config
+            conn.execute(
+                sa.text(
+                    """
+                    UPDATE connector
+                    SET connector_specific_config = :new_config
+                    WHERE id = :connector_id
+                    """
+                ),
+                {"new_config": json.dumps(new_config), "connector_id": connector_id},
+            )
+            reverted_count += 1
+        except Exception as e:
+            logger.error(f"Error reverting connector {connector_id}: {str(e)}")
--- a/backend/alembic/versions/3bd4c84fe72f_improved_index.py
+++ b/backend/alembic/versions/3bd4c84fe72f_improved_index.py
@@ -0,0 +1,84 @@
+"""improved index
+
+Revision ID: 3bd4c84fe72f
+Revises: 8f43500ee275
+Create Date: 2025-02-26 13:07:56.217791
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "3bd4c84fe72f"
+down_revision = "8f43500ee275"
+branch_labels = None
+depends_on = None
+
+
+# NOTE:
+# This migration addresses issues with the previous migration (8f43500ee275) which caused
+# an outage by creating an index without using CONCURRENTLY. This migration:
+#
+# 1. Creates more efficient full-text search capabilities using tsvector columns and GIN indexes
+# 2. Uses CONCURRENTLY for all index creation to prevent table locking
+# 3. Explicitly manages transactions with COMMIT statements to allow CONCURRENTLY to work
+# (see: https://www.postgresql.org/docs/9.4/sql-createindex.html#SQL-CREATEINDEX-CONCURRENTLY)
+# (see: https://github.com/sqlalchemy/alembic/issues/277)
+# 4. Adds indexes to both chat_message and chat_session tables for comprehensive search
+
+
+def upgrade() -> None:
+    # Create a GIN index for full-text search on chat_message.message
+    op.execute(
+        """
+        ALTER TABLE chat_message
+        ADD COLUMN message_tsv tsvector
+        GENERATED ALWAYS AS (to_tsvector('english', message)) STORED;
+        """
+    )
+
+    # Commit the current transaction before creating concurrent indexes
+    op.execute("COMMIT")
+
+    op.execute(
+        """
+        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_message_tsv
+        ON chat_message
+        USING GIN (message_tsv)
+        """
+    )
+
+    # Also add a stored tsvector column for chat_session.description
+    op.execute(
+        """
+        ALTER TABLE chat_session
+        ADD COLUMN description_tsv tsvector
+        GENERATED ALWAYS AS (to_tsvector('english', coalesce(description, ''))) STORED;
+        """
+    )
+
+    # Commit again before creating the second concurrent index
+    op.execute("COMMIT")
+
+    op.execute(
+        """
+        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_session_desc_tsv
+        ON chat_session
+        USING GIN (description_tsv)
+        """
+    )
+
+
+def downgrade() -> None:
+    # Drop the indexes first (use CONCURRENTLY for dropping too)
+    op.execute("COMMIT")
+    op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_message_tsv;")
+
+    op.execute("COMMIT")
+    op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_chat_session_desc_tsv;")
+
+    # Then drop the columns
+    op.execute("ALTER TABLE chat_message DROP COLUMN IF EXISTS message_tsv;")
+    op.execute("ALTER TABLE chat_session DROP COLUMN IF EXISTS description_tsv;")
+
+    op.execute("DROP INDEX IF EXISTS idx_chat_message_message_lower;")
--- a/backend/alembic/versions/8f43500ee275_add_index.py
+++ b/backend/alembic/versions/8f43500ee275_add_index.py
@@ -0,0 +1,32 @@
+"""add index
+
+Revision ID: 8f43500ee275
+Revises: da42808081e3
+Create Date: 2025-02-24 17:35:33.072714
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "8f43500ee275"
+down_revision = "da42808081e3"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Create a basic index on the lowercase message column for direct text matching
+    # Limit to 1500 characters to stay well under the 2856 byte limit of btree version 4
+    # op.execute(
+    #     """
+    #     CREATE INDEX idx_chat_message_message_lower
+    #     ON chat_message (LOWER(substring(message, 1, 1500)))
+    #     """
+    # )
+    pass
+
+
+def downgrade() -> None:
+    # Drop the index
+    op.execute("DROP INDEX IF EXISTS idx_chat_message_message_lower;")
--- a/backend/alembic/versions/9c00a2bccb83_chat_message_agentic.py
+++ b/backend/alembic/versions/9c00a2bccb83_chat_message_agentic.py
@@ -0,0 +1,43 @@
+"""chat_message_agentic
+
+Revision ID: 9c00a2bccb83
+Revises: b7a7eee5aa15
+Create Date: 2025-02-17 11:15:43.081150
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "9c00a2bccb83"
+down_revision = "b7a7eee5aa15"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # First add the column as nullable
+    op.add_column("chat_message", sa.Column("is_agentic", sa.Boolean(), nullable=True))
+
+    # Update existing rows based on presence of SubQuestions
+    op.execute(
+        """
+        UPDATE chat_message
+        SET is_agentic = EXISTS (
+            SELECT 1
+            FROM agent__sub_question
+            WHERE agent__sub_question.primary_question_id = chat_message.id
+        )
+        WHERE is_agentic IS NULL
+    """
+    )
+
+    # Make the column non-nullable with a default value of False
+    op.alter_column(
+        "chat_message", "is_agentic", nullable=False, server_default=sa.text("false")
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_message", "is_agentic")
--- a/backend/alembic/versions/acaab4ef4507_remove_inactive_ccpair_status_on_.py
+++ b/backend/alembic/versions/acaab4ef4507_remove_inactive_ccpair_status_on_.py
@@ -0,0 +1,29 @@
+"""remove inactive ccpair status on downgrade
+
+Revision ID: acaab4ef4507
+Revises: b388730a2899
+Create Date: 2025-02-16 18:21:41.330212
+
+"""
+from alembic import op
+from onyx.db.models import ConnectorCredentialPair
+from onyx.db.enums import ConnectorCredentialPairStatus
+from sqlalchemy import update
+
+# revision identifiers, used by Alembic.
+revision = "acaab4ef4507"
+down_revision = "b388730a2899"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    pass
+
+
+def downgrade() -> None:
+    op.execute(
+        update(ConnectorCredentialPair)
+        .where(ConnectorCredentialPair.status == ConnectorCredentialPairStatus.INVALID)
+        .values(status=ConnectorCredentialPairStatus.ACTIVE)
+    )
--- a/backend/alembic/versions/b388730a2899_nullable_preferences.py
+++ b/backend/alembic/versions/b388730a2899_nullable_preferences.py
@@ -0,0 +1,31 @@
+"""nullable preferences
+
+Revision ID: b388730a2899
+Revises: 1a03d2c2856b
+Create Date: 2025-02-17 18:49:22.643902
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "b388730a2899"
+down_revision = "1a03d2c2856b"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.alter_column("user", "temperature_override_enabled", nullable=True)
+    op.alter_column("user", "auto_scroll", nullable=True)
+
+
+def downgrade() -> None:
+    # Ensure no null values before making columns non-nullable
+    op.execute(
+        'UPDATE "user" SET temperature_override_enabled = false WHERE temperature_override_enabled IS NULL'
+    )
+    op.execute('UPDATE "user" SET auto_scroll = false WHERE auto_scroll IS NULL')
+
+    op.alter_column("user", "temperature_override_enabled", nullable=False)
+    op.alter_column("user", "auto_scroll", nullable=False)
--- a/backend/alembic/versions/b7a7eee5aa15_add_checkpointing_failure_handling.py
+++ b/backend/alembic/versions/b7a7eee5aa15_add_checkpointing_failure_handling.py
@@ -0,0 +1,124 @@
+"""Add checkpointing/failure handling
+
+Revision ID: b7a7eee5aa15
+Revises: f39c5794c10a
+Create Date: 2025-01-24 15:17:36.763172
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "b7a7eee5aa15"
+down_revision = "f39c5794c10a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "index_attempt",
+        sa.Column("checkpoint_pointer", sa.String(), nullable=True),
+    )
+    op.add_column(
+        "index_attempt",
+        sa.Column("poll_range_start", sa.DateTime(timezone=True), nullable=True),
+    )
+    op.add_column(
+        "index_attempt",
+        sa.Column("poll_range_end", sa.DateTime(timezone=True), nullable=True),
+    )
+
+    op.create_index(
+        "ix_index_attempt_cc_pair_settings_poll",
+        "index_attempt",
+        [
+            "connector_credential_pair_id",
+            "search_settings_id",
+            "status",
+            sa.text("time_updated DESC"),
+        ],
+    )
+
+    # Drop the old IndexAttemptError table
+    op.drop_index("index_attempt_id", table_name="index_attempt_errors")
+    op.drop_table("index_attempt_errors")
+
+    # Create the new version of the table
+    op.create_table(
+        "index_attempt_errors",
+        sa.Column("id", sa.Integer(), primary_key=True),
+        sa.Column("index_attempt_id", sa.Integer(), nullable=False),
+        sa.Column("connector_credential_pair_id", sa.Integer(), nullable=False),
+        sa.Column("document_id", sa.String(), nullable=True),
+        sa.Column("document_link", sa.String(), nullable=True),
+        sa.Column("entity_id", sa.String(), nullable=True),
+        sa.Column("failed_time_range_start", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("failed_time_range_end", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("failure_message", sa.Text(), nullable=False),
+        sa.Column("is_resolved", sa.Boolean(), nullable=False, default=False),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["index_attempt_id"],
+            ["index_attempt.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["connector_credential_pair_id"],
+            ["connector_credential_pair.id"],
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.execute("SET lock_timeout = '5s'")
+
+    # try a few times to drop the table, this has been observed to fail due to other locks
+    # blocking the drop
+    NUM_TRIES = 10
+    for i in range(NUM_TRIES):
+        try:
+            op.drop_table("index_attempt_errors")
+            break
+        except Exception as e:
+            if i == NUM_TRIES - 1:
+                raise e
+            print(f"Error dropping table: {e}. Retrying...")
+
+    op.execute("SET lock_timeout = DEFAULT")
+
+    # Recreate the old IndexAttemptError table
+    op.create_table(
+        "index_attempt_errors",
+        sa.Column("id", sa.Integer(), primary_key=True),
+        sa.Column("index_attempt_id", sa.Integer(), nullable=True),
+        sa.Column("batch", sa.Integer(), nullable=True),
+        sa.Column("doc_summaries", postgresql.JSONB(), nullable=False),
+        sa.Column("error_msg", sa.Text(), nullable=True),
+        sa.Column("traceback", sa.Text(), nullable=True),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+        ),
+        sa.ForeignKeyConstraint(
+            ["index_attempt_id"],
+            ["index_attempt.id"],
+        ),
+    )
+
+    op.create_index(
+        "index_attempt_id",
+        "index_attempt_errors",
+        ["time_created"],
+    )
+
+    op.drop_index("ix_index_attempt_cc_pair_settings_poll")
+    op.drop_column("index_attempt", "checkpoint_pointer")
+    op.drop_column("index_attempt", "poll_range_start")
+    op.drop_column("index_attempt", "poll_range_end")
--- a/backend/alembic/versions/b7c2b63c4a03_add_background_reindex_enabled_field.py
+++ b/backend/alembic/versions/b7c2b63c4a03_add_background_reindex_enabled_field.py
@@ -0,0 +1,55 @@
+"""add background_reindex_enabled field
+
+Revision ID: b7c2b63c4a03
+Revises: f11b408e39d3
+Create Date: 2024-03-26 12:34:56.789012
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+from onyx.db.enums import EmbeddingPrecision
+
+
+# revision identifiers, used by Alembic.
+revision = "b7c2b63c4a03"
+down_revision = "f11b408e39d3"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add background_reindex_enabled column with default value of True
+    op.add_column(
+        "search_settings",
+        sa.Column(
+            "background_reindex_enabled",
+            sa.Boolean(),
+            nullable=False,
+            server_default="true",
+        ),
+    )
+
+    # Add embedding_precision column with default value of FLOAT
+    op.add_column(
+        "search_settings",
+        sa.Column(
+            "embedding_precision",
+            sa.Enum(EmbeddingPrecision, native_enum=False),
+            nullable=False,
+            server_default=EmbeddingPrecision.FLOAT.name,
+        ),
+    )
+
+    # Add reduced_dimension column with default value of None
+    op.add_column(
+        "search_settings",
+        sa.Column("reduced_dimension", sa.Integer(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    # Remove the background_reindex_enabled column
+    op.drop_column("search_settings", "background_reindex_enabled")
+    op.drop_column("search_settings", "embedding_precision")
+    op.drop_column("search_settings", "reduced_dimension")
--- a/backend/alembic/versions/da42808081e3_migrate_jira_connectors_to_new_format.py
+++ b/backend/alembic/versions/da42808081e3_migrate_jira_connectors_to_new_format.py
@@ -0,0 +1,120 @@
+"""migrate jira connectors to new format
+
+Revision ID: da42808081e3
+Revises: f13db29f3101
+Create Date: 2025-02-24 11:24:54.396040
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import json
+
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.onyx_jira.utils import extract_jira_project
+
+
+# revision identifiers, used by Alembic.
+revision = "da42808081e3"
+down_revision = "f13db29f3101"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Get all Jira connectors
+    conn = op.get_bind()
+
+    # First get all Jira connectors
+    jira_connectors = conn.execute(
+        sa.text(
+            """
+            SELECT id, connector_specific_config
+            FROM connector
+            WHERE source = :source
+            """
+        ),
+        {"source": DocumentSource.JIRA.value.upper()},
+    ).fetchall()
+
+    # Update each connector's config
+    for connector_id, old_config in jira_connectors:
+        if not old_config:
+            continue
+
+        # Extract project key from URL if it exists
+        new_config: dict[str, str | None] = {}
+        if project_url := old_config.get("jira_project_url"):
+            # Parse the URL to get base and project
+            try:
+                jira_base, project_key = extract_jira_project(project_url)
+                new_config = {"jira_base_url": jira_base, "project_key": project_key}
+            except ValueError:
+                # If URL parsing fails, just use the URL as the base
+                new_config = {
+                    "jira_base_url": project_url.split("/projects/")[0],
+                    "project_key": None,
+                }
+        else:
+            # For connectors without a project URL, we need admin intervention
+            # Mark these for review
+            print(
+                f"WARNING: Jira connector {connector_id} has no project URL configured"
+            )
+            continue
+
+        # Update the connector config
+        conn.execute(
+            sa.text(
+                """
+                UPDATE connector
+                SET connector_specific_config = :new_config
+                WHERE id = :id
+                """
+            ),
+            {"id": connector_id, "new_config": json.dumps(new_config)},
+        )
+
+
+def downgrade() -> None:
+    # Get all Jira connectors
+    conn = op.get_bind()
+
+    # First get all Jira connectors
+    jira_connectors = conn.execute(
+        sa.text(
+            """
+            SELECT id, connector_specific_config
+            FROM connector
+            WHERE source = :source
+            """
+        ),
+        {"source": DocumentSource.JIRA.value.upper()},
+    ).fetchall()
+
+    # Update each connector's config back to the old format
+    for connector_id, new_config in jira_connectors:
+        if not new_config:
+            continue
+
+        old_config = {}
+        base_url = new_config.get("jira_base_url")
+        project_key = new_config.get("project_key")
+
+        if base_url and project_key:
+            old_config = {"jira_project_url": f"{base_url}/projects/{project_key}"}
+        elif base_url:
+            old_config = {"jira_project_url": base_url}
+        else:
+            continue
+
+        # Update the connector config
+        conn.execute(
+            sa.text(
+                """
+                UPDATE connector
+                SET connector_specific_config = :old_config
+                WHERE id = :id
+                """
+            ),
+            {"id": connector_id, "old_config": old_config},
+        )
--- a/backend/alembic/versions/f11b408e39d3_force_lowercase_all_users.py
+++ b/backend/alembic/versions/f11b408e39d3_force_lowercase_all_users.py
@@ -0,0 +1,36 @@
+"""force lowercase all users
+
+Revision ID: f11b408e39d3
+Revises: 3bd4c84fe72f
+Create Date: 2025-02-26 17:04:55.683500
+
+"""
+
+
+# revision identifiers, used by Alembic.
+revision = "f11b408e39d3"
+down_revision = "3bd4c84fe72f"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # 1) Convert all existing user emails to lowercase
+    from alembic import op
+
+    op.execute(
+        """
+        UPDATE "user"
+        SET email = LOWER(email)
+        """
+    )
+
+    # 2) Add a check constraint to ensure emails are always lowercase
+    op.create_check_constraint("ensure_lowercase_email", "user", "email = LOWER(email)")
+
+
+def downgrade() -> None:
+    # Drop the check constraint
+    from alembic import op
+
+    op.drop_constraint("ensure_lowercase_email", "user", type_="check")
--- a/backend/alembic/versions/f13db29f3101_add_composite_index_for_last_modified_.py
+++ b/backend/alembic/versions/f13db29f3101_add_composite_index_for_last_modified_.py
@@ -0,0 +1,27 @@
+"""Add composite index for last_modified and last_synced to document
+
+Revision ID: f13db29f3101
+Revises: b388730a2899
+Create Date: 2025-02-18 22:48:11.511389
+
+"""
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "f13db29f3101"
+down_revision = "acaab4ef4507"
+branch_labels: str | None = None
+depends_on: str | None = None
+
+
+def upgrade() -> None:
+    op.create_index(
+        "ix_document_sync_status",
+        "document",
+        ["last_modified", "last_synced"],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_document_sync_status", table_name="document")
--- a/backend/alembic/versions/f39c5794c10a_add_background_errors_table.py
+++ b/backend/alembic/versions/f39c5794c10a_add_background_errors_table.py
@@ -0,0 +1,40 @@
+"""Add background errors table
+
+Revision ID: f39c5794c10a
+Revises: 2cdeff6d8c93
+Create Date: 2025-02-12 17:11:14.527876
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "f39c5794c10a"
+down_revision = "2cdeff6d8c93"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "background_error",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("message", sa.String(), nullable=False),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column("cc_pair_id", sa.Integer(), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+        sa.ForeignKeyConstraint(
+            ["cc_pair_id"],
+            ["connector_credential_pair.id"],
+            ondelete="CASCADE",
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("background_error")
--- a/backend/alembic_tenants/versions/34e3630c7f32_lowercase_multi_tenant_user_auth.py
+++ b/backend/alembic_tenants/versions/34e3630c7f32_lowercase_multi_tenant_user_auth.py
@@ -0,0 +1,42 @@
+"""lowercase multi-tenant user auth
+
+Revision ID: 34e3630c7f32
+Revises: a4f6ee863c47
+Create Date: 2025-02-26 15:03:01.211894
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "34e3630c7f32"
+down_revision = "a4f6ee863c47"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # 1) Convert all existing rows to lowercase
+    op.execute(
+        """
+        UPDATE user_tenant_mapping
+        SET email = LOWER(email)
+        """
+    )
+    # 2) Add a check constraint so that emails cannot be written in uppercase
+    op.create_check_constraint(
+        "ensure_lowercase_email",
+        "user_tenant_mapping",
+        "email = LOWER(email)",
+        schema="public",
+    )
+
+
+def downgrade() -> None:
+    # Drop the check constraint
+    op.drop_constraint(
+        "ensure_lowercase_email",
+        "user_tenant_mapping",
+        schema="public",
+        type_="check",
+    )
--- a/backend/ee/onyx/background/celery/apps/primary.py
+++ b/backend/ee/onyx/background/celery/apps/primary.py
@@ -4,12 +4,11 @@ from ee.onyx.server.reporting.usage_export_generation import create_new_usage_re
 from onyx.background.celery.apps.primary import celery_app
 from onyx.background.task_utils import build_celery_task_wrapper
 from onyx.configs.app_configs import JOB_TIMEOUT
-from onyx.db.chat import delete_chat_sessions_older_than
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.chat import delete_chat_session
+from onyx.db.chat import get_chat_sessions_older_than
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.server.settings.store import load_settings
 from onyx.utils.logger import setup_logger
-from shared_configs.configs import MULTI_TENANT
-from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

 logger = setup_logger()

@@ -18,11 +17,28 @@ logger = setup_logger()

@build_celery_task_wrapper(name_chat_ttl_task)
@celery_app.task(soft_time_limit=JOB_TIMEOUT)
-def perform_ttl_management_task(
-    retention_limit_days: int, *, tenant_id: str | None
-) -> None:
-    with get_session_with_tenant(tenant_id) as db_session:
-        delete_chat_sessions_older_than(retention_limit_days, db_session)
+def perform_ttl_management_task(retention_limit_days: int, *, tenant_id: str) -> None:
+    with get_session_with_current_tenant() as db_session:
+        old_chat_sessions = get_chat_sessions_older_than(
+            retention_limit_days, db_session
+        )
+
+    for user_id, session_id in old_chat_sessions:
+        # one session per delete so that we don't blow up if a deletion fails.
+        with get_session_with_current_tenant() as db_session:
+            try:
+                delete_chat_session(
+                    user_id,
+                    session_id,
+                    db_session,
+                    include_deleted=True,
+                    hard_delete=True,
+                )
+            except Exception:
+                logger.exception(
+                    "delete_chat_session exceptioned. "
+                    f"user_id={user_id} session_id={session_id}"
+                )


 #####
@@ -35,24 +51,19 @@ def perform_ttl_management_task(
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
 )
-def check_ttl_management_task(*, tenant_id: str | None) -> None:
+def check_ttl_management_task(*, tenant_id: str) -> None:
    """Runs periodically to check if any ttl tasks should be run and adds them
    to the queue"""
-    token = None
-    if MULTI_TENANT and tenant_id is not None:
-        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

    settings = load_settings()
    retention_limit_days = settings.maximum_chat_retention_days
-    with get_session_with_tenant(tenant_id) as db_session:
+    with get_session_with_current_tenant() as db_session:
        if should_perform_chat_ttl_check(retention_limit_days, db_session):
            perform_ttl_management_task.apply_async(
                kwargs=dict(
                    retention_limit_days=retention_limit_days, tenant_id=tenant_id
                ),
            )
-    if token is not None:
-        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)


@celery_app.task(
@@ -60,9 +71,9 @@ def check_ttl_management_task(*, tenant_id: str | None) -> None:
    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
 )
-def autogenerate_usage_report_task(*, tenant_id: str | None) -> None:
+def autogenerate_usage_report_task(*, tenant_id: str) -> None:
    """This generates usage report under the /admin/generate-usage/report endpoint"""
-    with get_session_with_tenant(tenant_id) as db_session:
+    with get_session_with_current_tenant() as db_session:
        create_new_usage_report(
            db_session=db_session,
            user_id=None,
--- a/backend/ee/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/ee/onyx/background/celery/tasks/beat_schedule.py
@@ -1,44 +1,46 @@
 from datetime import timedelta
 from typing import Any

+from onyx.background.celery.tasks.beat_schedule import (
+    beat_cloud_tasks as base_beat_system_tasks,
+)
 from onyx.background.celery.tasks.beat_schedule import BEAT_EXPIRES_DEFAULT
 from onyx.background.celery.tasks.beat_schedule import (
-    cloud_tasks_to_schedule as base_cloud_tasks_to_schedule,
+    beat_task_templates as base_beat_task_templates,
 )
+from onyx.background.celery.tasks.beat_schedule import generate_cloud_tasks
 from onyx.background.celery.tasks.beat_schedule import (
-    tasks_to_schedule as base_tasks_to_schedule,
+    get_tasks_to_schedule as base_get_tasks_to_schedule,
 )
-from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryTask
 from shared_configs.configs import MULTI_TENANT

-ee_cloud_tasks_to_schedule = [
-    {
-        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_autogenerate-usage-report",
-        "task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
-        "schedule": timedelta(days=30),
-        "options": {
-            "priority": OnyxCeleryPriority.HIGHEST,
-            "expires": BEAT_EXPIRES_DEFAULT,
+ee_beat_system_tasks: list[dict] = []
+
+ee_beat_task_templates: list[dict] = []
+ee_beat_task_templates.extend(
+    [
+        {
+            "name": "autogenerate-usage-report",
+            "task": OnyxCeleryTask.AUTOGENERATE_USAGE_REPORT_TASK,
+            "schedule": timedelta(days=30),
+            "options": {
+                "priority": OnyxCeleryPriority.MEDIUM,
+                "expires": BEAT_EXPIRES_DEFAULT,
+            },
        },
-        "kwargs": {
-            "task_name": OnyxCeleryTask.AUTOGENERATE_USAGE_REPORT_TASK,
+        {
+            "name": "check-ttl-management",
+            "task": OnyxCeleryTask.CHECK_TTL_MANAGEMENT_TASK,
+            "schedule": timedelta(hours=1),
+            "options": {
+                "priority": OnyxCeleryPriority.MEDIUM,
+                "expires": BEAT_EXPIRES_DEFAULT,
+            },
        },
-    },
-    {
-        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-ttl-management",
-        "task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
-        "schedule": timedelta(hours=1),
-        "options": {
-            "priority": OnyxCeleryPriority.HIGHEST,
-            "expires": BEAT_EXPIRES_DEFAULT,
-        },
-        "kwargs": {
-            "task_name": OnyxCeleryTask.CHECK_TTL_MANAGEMENT_TASK,
-        },
-    },
-]
+    ]
+)

 ee_tasks_to_schedule: list[dict] = []

@@ -65,9 +67,14 @@ if not MULTI_TENANT:
    ]


-def get_cloud_tasks_to_schedule() -> list[dict[str, Any]]:
-    return ee_cloud_tasks_to_schedule + base_cloud_tasks_to_schedule
+def get_cloud_tasks_to_schedule(beat_multiplier: float) -> list[dict[str, Any]]:
+    beat_system_tasks = ee_beat_system_tasks + base_beat_system_tasks
+    beat_task_templates = ee_beat_task_templates + base_beat_task_templates
+    cloud_tasks = generate_cloud_tasks(
+        beat_system_tasks, beat_task_templates, beat_multiplier
+    )
+    return cloud_tasks


 def get_tasks_to_schedule() -> list[dict[str, Any]]:
-    return ee_tasks_to_schedule + base_tasks_to_schedule
+    return ee_tasks_to_schedule + base_get_tasks_to_schedule()
--- a/backend/ee/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/ee/onyx/background/celery/tasks/vespa/tasks.py
@@ -18,7 +18,7 @@ logger = setup_logger()


 def monitor_usergroup_taskset(
-    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
+    tenant_id: str, key_bytes: bytes, r: Redis, db_session: Session
 ) -> None:
    """This function is likely to move in the worker refactor happening next."""
    fence_key = key_bytes.decode("utf-8")
--- a/backend/ee/onyx/configs/app_configs.py
+++ b/backend/ee/onyx/configs/app_configs.py
@@ -59,10 +59,14 @@ SUPER_CLOUD_API_KEY = os.environ.get("SUPER_CLOUD_API_KEY", "api_key")

 OAUTH_SLACK_CLIENT_ID = os.environ.get("OAUTH_SLACK_CLIENT_ID", "")
 OAUTH_SLACK_CLIENT_SECRET = os.environ.get("OAUTH_SLACK_CLIENT_SECRET", "")
-OAUTH_CONFLUENCE_CLIENT_ID = os.environ.get("OAUTH_CONFLUENCE_CLIENT_ID", "")
-OAUTH_CONFLUENCE_CLIENT_SECRET = os.environ.get("OAUTH_CONFLUENCE_CLIENT_SECRET", "")
-OAUTH_JIRA_CLIENT_ID = os.environ.get("OAUTH_JIRA_CLIENT_ID", "")
-OAUTH_JIRA_CLIENT_SECRET = os.environ.get("OAUTH_JIRA_CLIENT_SECRET", "")
+OAUTH_CONFLUENCE_CLOUD_CLIENT_ID = os.environ.get(
+    "OAUTH_CONFLUENCE_CLOUD_CLIENT_ID", ""
+)
+OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET = os.environ.get(
+    "OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET", ""
+)
+OAUTH_JIRA_CLOUD_CLIENT_ID = os.environ.get("OAUTH_JIRA_CLOUD_CLIENT_ID", "")
+OAUTH_JIRA_CLOUD_CLIENT_SECRET = os.environ.get("OAUTH_JIRA_CLOUD_CLIENT_SECRET", "")
 OAUTH_GOOGLE_DRIVE_CLIENT_ID = os.environ.get("OAUTH_GOOGLE_DRIVE_CLIENT_ID", "")
 OAUTH_GOOGLE_DRIVE_CLIENT_SECRET = os.environ.get(
    "OAUTH_GOOGLE_DRIVE_CLIENT_SECRET", ""
@@ -77,3 +81,5 @@ POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"
 HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")

 ANONYMOUS_USER_COOKIE_NAME = "onyx_anonymous_user"
+
+GATED_TENANTS_KEY = "gated_tenants"
--- a/backend/ee/onyx/db/connector_credential_pair.py
+++ b/backend/ee/onyx/db/connector_credential_pair.py
@@ -4,6 +4,7 @@ from sqlalchemy.orm import Session
 from onyx.configs.constants import DocumentSource
 from onyx.db.connector_credential_pair import get_connector_credential_pair
 from onyx.db.enums import AccessType
+from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.models import Connector
 from onyx.db.models import ConnectorCredentialPair
 from onyx.db.models import UserGroup__ConnectorCredentialPair
@@ -35,10 +36,11 @@ def _delete_connector_credential_pair_user_groups_relationship__no_commit(
 def get_cc_pairs_by_source(
    db_session: Session,
    source_type: DocumentSource,
-    only_sync: bool,
+    access_type: AccessType | None = None,
+    status: ConnectorCredentialPairStatus | None = None,
 ) -> list[ConnectorCredentialPair]:
    """
-    Get all cc_pairs for a given source type (and optionally only sync)
+    Get all cc_pairs for a given source type with optional filtering by access_type and status
    result is sorted by cc_pair id
    """
    query = (
@@ -48,8 +50,11 @@ def get_cc_pairs_by_source(
        .order_by(ConnectorCredentialPair.id)
    )

-    if only_sync:
-        query = query.filter(ConnectorCredentialPair.access_type == AccessType.SYNC)
+    if access_type is not None:
+        query = query.filter(ConnectorCredentialPair.access_type == access_type)
+
+    if status is not None:
+        query = query.filter(ConnectorCredentialPair.status == status)

    cc_pairs = query.all()
    return cc_pairs
--- a/backend/ee/onyx/db/persona.py
+++ b/backend/ee/onyx/db/persona.py
@@ -15,6 +15,9 @@ def make_persona_private(
    group_ids: list[int] | None,
    db_session: Session,
 ) -> None:
+    """NOTE(rkuo): This function batches all updates into a single commit. If we don't
+    dedupe the inputs, the commit will exception."""
+
    db_session.query(Persona__User).filter(
        Persona__User.persona_id == persona_id
    ).delete(synchronize_session="fetch")
@@ -23,19 +26,22 @@ def make_persona_private(
    ).delete(synchronize_session="fetch")

    if user_ids:
-        for user_uuid in user_ids:
-            db_session.add(Persona__User(persona_id=persona_id, user_id=user_uuid))
+        user_ids_set = set(user_ids)
+        for user_id in user_ids_set:
+            db_session.add(Persona__User(persona_id=persona_id, user_id=user_id))

            create_notification(
-                user_id=user_uuid,
+                user_id=user_id,
                notif_type=NotificationType.PERSONA_SHARED,
                db_session=db_session,
                additional_data=PersonaSharedNotificationData(
                    persona_id=persona_id,
                ).model_dump(),
            )
+
    if group_ids:
-        for group_id in group_ids:
+        group_ids_set = set(group_ids)
+        for group_id in group_ids_set:
            db_session.add(
                Persona__UserGroup(persona_id=persona_id, user_group_id=group_id)
            )
--- a/backend/ee/onyx/db/query_history.py
+++ b/backend/ee/onyx/db/query_history.py
@@ -134,7 +134,9 @@ def fetch_chat_sessions_eagerly_by_time(
    limit: int | None = 500,
    initial_time: datetime | None = None,
 ) -> list[ChatSession]:
-    time_order: UnaryExpression = desc(ChatSession.time_created)
+    """Sorted by oldest to newest, then by message id"""
+
+    asc_time_order: UnaryExpression = asc(ChatSession.time_created)
    message_order: UnaryExpression = asc(ChatMessage.id)

    filters: list[ColumnElement | BinaryExpression] = [
@@ -147,8 +149,7 @@ def fetch_chat_sessions_eagerly_by_time(
    subquery = (
        db_session.query(ChatSession.id, ChatSession.time_created)
        .filter(*filters)
-        .order_by(ChatSession.id, time_order)
-        .distinct(ChatSession.id)
+        .order_by(asc_time_order)
        .limit(limit)
        .subquery()
    )
@@ -164,7 +165,7 @@ def fetch_chat_sessions_eagerly_by_time(
                ChatMessage.chat_message_feedbacks
            ),
        )
-        .order_by(time_order, message_order)
+        .order_by(asc_time_order, message_order)
    )

    chat_sessions = query.all()
--- a/backend/ee/onyx/db/usage_export.py
+++ b/backend/ee/onyx/db/usage_export.py
@@ -16,13 +16,18 @@ from onyx.db.models import UsageReport
 from onyx.file_store.file_store import get_default_file_store


-# Gets skeletons of all message
+# Gets skeletons of all messages in the given range
 def get_empty_chat_messages_entries__paginated(
    db_session: Session,
    period: tuple[datetime, datetime],
    limit: int | None = 500,
    initial_time: datetime | None = None,
 ) -> tuple[Optional[datetime], list[ChatMessageSkeleton]]:
+    """Returns a tuple where:
+    first element is the most recent timestamp out of the sessions iterated
+    - this timestamp can be used to paginate forward in time
+    second element is a list of messages belonging to all the sessions iterated
+    """
    chat_sessions = fetch_chat_sessions_eagerly_by_time(
        start=period[0],
        end=period[1],
@@ -52,18 +57,17 @@ def get_empty_chat_messages_entries__paginated(
    if len(chat_sessions) == 0:
        return None, []

-    return chat_sessions[0].time_created, message_skeletons
+    return chat_sessions[-1].time_created, message_skeletons


 def get_all_empty_chat_message_entries(
    db_session: Session,
    period: tuple[datetime, datetime],
 ) -> Generator[list[ChatMessageSkeleton], None, None]:
+    """period is the range of time over which to fetch messages."""
    initial_time: Optional[datetime] = period[0]
-    ind = 0
    while True:
-        ind += 1
-
+        # iterate from oldest to newest
        time_created, message_skeletons = get_empty_chat_messages_entries__paginated(
            db_session,
            period,
--- a/backend/ee/onyx/db/user_group.py
+++ b/backend/ee/onyx/db/user_group.py
@@ -424,7 +424,7 @@ def _validate_curator_status__no_commit(
        )

        # if the user is a curator in any of their groups, set their role to CURATOR
-        # otherwise, set their role to BASIC
+        # otherwise, set their role to BASIC only if they were previously a CURATOR
        if curator_relationships:
            user.role = UserRole.CURATOR
        elif user.role == UserRole.CURATOR:
@@ -631,7 +631,16 @@ def update_user_group(
    removed_users = db_session.scalars(
        select(User).where(User.id.in_(removed_user_ids))  # type: ignore
    ).unique()
-    _validate_curator_status__no_commit(db_session, list(removed_users))
+
+    # Filter out admin and global curator users before validating curator status
+    users_to_validate = [
+        user
+        for user in removed_users
+        if user.role not in [UserRole.ADMIN, UserRole.GLOBAL_CURATOR]
+    ]
+
+    if users_to_validate:
+        _validate_curator_status__no_commit(db_session, users_to_validate)

    # update "time_updated" to now
    db_user_group.time_last_modified_by_user = func.now()
--- a/backend/ee/onyx/external_permissions/confluence/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/doc_sync.py
@@ -9,12 +9,16 @@ from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GR
 from onyx.access.models import DocExternalAccess
 from onyx.access.models import ExternalAccess
 from onyx.connectors.confluence.connector import ConfluenceConnector
+from onyx.connectors.confluence.onyx_confluence import (
+    get_user_email_from_username__server,
+)
 from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
-from onyx.connectors.confluence.utils import get_user_email_from_username__server
+from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
 from onyx.connectors.models import SlimDocument
 from onyx.db.models import ConnectorCredentialPair
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger
+from shared_configs.contextvars import get_current_tenant_id

 logger = setup_logger()

@@ -342,7 +346,8 @@ def _fetch_all_page_restrictions(


 def confluence_doc_sync(
-    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
+    cc_pair: ConnectorCredentialPair,
+    callback: IndexingHeartbeatInterface | None,
 ) -> list[DocExternalAccess]:
    """
    Adds the external permissions to the documents in postgres
@@ -354,7 +359,11 @@ def confluence_doc_sync(
    confluence_connector = ConfluenceConnector(
        **cc_pair.connector.connector_specific_config
    )
-    confluence_connector.load_credentials(cc_pair.credential.credential_json)
+
+    provider = OnyxDBCredentialsProvider(
+        get_current_tenant_id(), "confluence", cc_pair.credential_id
+    )
+    confluence_connector.set_credentials_provider(provider)

    is_cloud = cc_pair.connector.connector_specific_config.get("is_cloud", False)

@@ -365,7 +374,9 @@ def confluence_doc_sync(

    slim_docs = []
    logger.debug("Fetching all slim documents from confluence")
-    for doc_batch in confluence_connector.retrieve_all_slim_documents():
+    for doc_batch in confluence_connector.retrieve_all_slim_documents(
+        callback=callback
+    ):
        logger.debug(f"Got {len(doc_batch)} slim documents from confluence")
        if callback:
            if callback.should_stop():
--- a/backend/ee/onyx/external_permissions/confluence/group_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/group_sync.py
@@ -1,8 +1,11 @@
 from ee.onyx.db.external_perm import ExternalUserGroup
 from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
-from onyx.connectors.confluence.onyx_confluence import build_confluence_client
+from onyx.background.error_logging import emit_background_error
+from onyx.connectors.confluence.onyx_confluence import (
+    get_user_email_from_username__server,
+)
 from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
-from onyx.connectors.confluence.utils import get_user_email_from_username__server
+from onyx.connectors.credentials_provider import OnyxDBCredentialsProvider
 from onyx.db.models import ConnectorCredentialPair
 from onyx.utils.logger import setup_logger

@@ -10,57 +13,81 @@ logger = setup_logger()


 def _build_group_member_email_map(
-    confluence_client: OnyxConfluence,
+    confluence_client: OnyxConfluence, cc_pair_id: int
 ) -> dict[str, set[str]]:
    group_member_emails: dict[str, set[str]] = {}
-    for user_result in confluence_client.paginated_cql_user_retrieval():
-        logger.debug(f"Processing groups for user: {user_result}")
+    for user in confluence_client.paginated_cql_user_retrieval():
+        logger.debug(f"Processing groups for user: {user}")

-        user = user_result.get("user", {})
-        if not user:
-            logger.warning(f"user result missing user field: {user_result}")
-            continue
-        email = user.get("email")
+        email = user.email
        if not email:
            # This field is only present in Confluence Server
-            user_name = user.get("username")
+            user_name = user.username
            # If it is present, try to get the email using a Server-specific method
            if user_name:
                email = get_user_email_from_username__server(
                    confluence_client=confluence_client,
                    user_name=user_name,
                )
+
        if not email:
            # If we still don't have an email, skip this user
-            logger.warning(f"user result missing email field: {user_result}")
+            msg = f"user result missing email field: {user}"
+            if user.type == "app":
+                logger.warning(msg)
+            else:
+                emit_background_error(msg, cc_pair_id=cc_pair_id)
+                logger.error(msg)
            continue

        all_users_groups: set[str] = set()
-        for group in confluence_client.paginated_groups_by_user_retrieval(user):
+        for group in confluence_client.paginated_groups_by_user_retrieval(user.user_id):
            # group name uniqueness is enforced by Confluence, so we can use it as a group ID
            group_id = group["name"]
            group_member_emails.setdefault(group_id, set()).add(email)
            all_users_groups.add(group_id)

-        if not group_member_emails:
-            logger.warning(f"No groups found for user with email: {email}")
+        if not all_users_groups:
+            msg = f"No groups found for user with email: {email}"
+            emit_background_error(msg, cc_pair_id=cc_pair_id)
+            logger.error(msg)
        else:
            logger.debug(f"Found groups {all_users_groups} for user with email {email}")

+    if not group_member_emails:
+        msg = "No groups found for any users."
+        emit_background_error(msg, cc_pair_id=cc_pair_id)
+        logger.error(msg)
+
    return group_member_emails


 def confluence_group_sync(
+    tenant_id: str,
    cc_pair: ConnectorCredentialPair,
 ) -> list[ExternalUserGroup]:
-    confluence_client = build_confluence_client(
-        credentials=cc_pair.credential.credential_json,
-        is_cloud=cc_pair.connector.connector_specific_config.get("is_cloud", False),
-        wiki_base=cc_pair.connector.connector_specific_config["wiki_base"],
-    )
+    provider = OnyxDBCredentialsProvider(tenant_id, "confluence", cc_pair.credential_id)
+    is_cloud = cc_pair.connector.connector_specific_config.get("is_cloud", False)
+    wiki_base: str = cc_pair.connector.connector_specific_config["wiki_base"]
+    url = wiki_base.rstrip("/")
+
+    probe_kwargs = {
+        "max_backoff_retries": 6,
+        "max_backoff_seconds": 10,
+    }
+
+    final_kwargs = {
+        "max_backoff_retries": 10,
+        "max_backoff_seconds": 60,
+    }
+
+    confluence_client = OnyxConfluence(is_cloud, url, provider)
+    confluence_client._probe_connection(**probe_kwargs)
+    confluence_client._initialize_connection(**final_kwargs)

    group_member_email_map = _build_group_member_email_map(
        confluence_client=confluence_client,
+        cc_pair_id=cc_pair.id,
    )
    onyx_groups: list[ExternalUserGroup] = []
    all_found_emails = set()
--- a/backend/ee/onyx/external_permissions/gmail/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/gmail/doc_sync.py
@@ -15,6 +15,7 @@ logger = setup_logger()
 def _get_slim_doc_generator(
    cc_pair: ConnectorCredentialPair,
    gmail_connector: GmailConnector,
+    callback: IndexingHeartbeatInterface | None = None,
 ) -> GenerateSlimDocumentOutput:
    current_time = datetime.now(timezone.utc)
    start_time = (
@@ -24,12 +25,15 @@ def _get_slim_doc_generator(
    )

    return gmail_connector.retrieve_all_slim_documents(
-        start=start_time, end=current_time.timestamp()
+        start=start_time,
+        end=current_time.timestamp(),
+        callback=callback,
    )


 def gmail_doc_sync(
-    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
+    cc_pair: ConnectorCredentialPair,
+    callback: IndexingHeartbeatInterface | None,
 ) -> list[DocExternalAccess]:
    """
    Adds the external permissions to the documents in postgres
@@ -40,7 +44,9 @@ def gmail_doc_sync(
    gmail_connector = GmailConnector(**cc_pair.connector.connector_specific_config)
    gmail_connector.load_credentials(cc_pair.credential.credential_json)

-    slim_doc_generator = _get_slim_doc_generator(cc_pair, gmail_connector)
+    slim_doc_generator = _get_slim_doc_generator(
+        cc_pair, gmail_connector, callback=callback
+    )

    document_external_access: list[DocExternalAccess] = []
    for slim_doc_batch in slim_doc_generator:
--- a/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
@@ -21,6 +21,7 @@ _PERMISSION_ID_PERMISSION_MAP: dict[str, dict[str, Any]] = {}
 def _get_slim_doc_generator(
    cc_pair: ConnectorCredentialPair,
    google_drive_connector: GoogleDriveConnector,
+    callback: IndexingHeartbeatInterface | None = None,
 ) -> GenerateSlimDocumentOutput:
    current_time = datetime.now(timezone.utc)
    start_time = (
@@ -30,7 +31,9 @@ def _get_slim_doc_generator(
    )

    return google_drive_connector.retrieve_all_slim_documents(
-        start=start_time, end=current_time.timestamp()
+        start=start_time,
+        end=current_time.timestamp(),
+        callback=callback,
    )


@@ -59,12 +62,14 @@ def _fetch_permissions_for_permission_ids(
        user_email=(owner_email or google_drive_connector.primary_admin_email),
    )

+    # We continue on 404 or 403 because the document may not exist or the user may not have access to it
    fetched_permissions = execute_paginated_retrieval(
        retrieval_function=drive_service.permissions().list,
        list_key="permissions",
        fileId=doc_id,
        fields="permissions(id, emailAddress, type, domain)",
        supportsAllDrives=True,
+        continue_on_404_or_403=True,
    )

    permissions_for_doc_id = []
@@ -101,7 +106,13 @@ def _get_permissions_from_slim_doc(
    user_emails: set[str] = set()
    group_emails: set[str] = set()
    public = False
+    skipped_permissions = 0
+
    for permission in permissions_list:
+        if not permission:
+            skipped_permissions += 1
+            continue
+
        permission_type = permission["type"]
        if permission_type == "user":
            user_emails.add(permission["emailAddress"])
@@ -118,6 +129,11 @@ def _get_permissions_from_slim_doc(
        elif permission_type == "anyone":
            public = True

+    if skipped_permissions > 0:
+        logger.warning(
+            f"Skipped {skipped_permissions} permissions of {len(permissions_list)} for document {slim_doc.id}"
+        )
+
    drive_id = permission_info.get("drive_id")
    group_ids = group_emails | ({drive_id} if drive_id is not None else set())

@@ -129,7 +145,8 @@ def _get_permissions_from_slim_doc(


 def gdrive_doc_sync(
-    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
+    cc_pair: ConnectorCredentialPair,
+    callback: IndexingHeartbeatInterface | None,
 ) -> list[DocExternalAccess]:
    """
    Adds the external permissions to the documents in postgres
--- a/backend/ee/onyx/external_permissions/google_drive/group_sync.py
+++ b/backend/ee/onyx/external_permissions/google_drive/group_sync.py
@@ -119,6 +119,7 @@ def _build_onyx_groups(


 def gdrive_group_sync(
+    tenant_id: str,
    cc_pair: ConnectorCredentialPair,
 ) -> list[ExternalUserGroup]:
    # Initialize connector and build credential/service objects
--- a/backend/ee/onyx/external_permissions/slack/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/slack/doc_sync.py
@@ -5,7 +5,7 @@ from onyx.access.models import DocExternalAccess
 from onyx.access.models import ExternalAccess
 from onyx.connectors.slack.connector import get_channels
 from onyx.connectors.slack.connector import make_paginated_slack_api_call_w_retries
-from onyx.connectors.slack.connector import SlackPollConnector
+from onyx.connectors.slack.connector import SlackConnector
 from onyx.db.models import ConnectorCredentialPair
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger
@@ -17,22 +17,14 @@ logger = setup_logger()
 def _get_slack_document_ids_and_channels(
    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
 ) -> dict[str, list[str]]:
-    slack_connector = SlackPollConnector(**cc_pair.connector.connector_specific_config)
+    slack_connector = SlackConnector(**cc_pair.connector.connector_specific_config)
    slack_connector.load_credentials(cc_pair.credential.credential_json)

-    slim_doc_generator = slack_connector.retrieve_all_slim_documents()
+    slim_doc_generator = slack_connector.retrieve_all_slim_documents(callback=callback)

    channel_doc_map: dict[str, list[str]] = {}
    for doc_metadata_batch in slim_doc_generator:
        for doc_metadata in doc_metadata_batch:
-            if callback:
-                if callback.should_stop():
-                    raise RuntimeError(
-                        "_get_slack_document_ids_and_channels: Stop signal detected"
-                    )
-
-                callback.progress("_get_slack_document_ids_and_channels", 1)
-
            if doc_metadata.perm_sync_data is None:
                continue
            channel_id = doc_metadata.perm_sync_data["channel_id"]
@@ -40,6 +32,14 @@ def _get_slack_document_ids_and_channels(
                channel_doc_map[channel_id] = []
            channel_doc_map[channel_id].append(doc_metadata.id)

+        if callback:
+            if callback.should_stop():
+                raise RuntimeError(
+                    "_get_slack_document_ids_and_channels: Stop signal detected"
+                )
+
+            callback.progress("_get_slack_document_ids_and_channels", 1)
+
    return channel_doc_map


@@ -123,7 +123,8 @@ def _fetch_channel_permissions(


 def slack_doc_sync(
-    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
+    cc_pair: ConnectorCredentialPair,
+    callback: IndexingHeartbeatInterface | None,
 ) -> list[DocExternalAccess]:
    """
    Adds the external permissions to the documents in postgres
--- a/backend/ee/onyx/external_permissions/sync_params.py
+++ b/backend/ee/onyx/external_permissions/sync_params.py
@@ -28,6 +28,7 @@ DocSyncFuncType = Callable[

 GroupSyncFuncType = Callable[
    [
+        str,
        ConnectorCredentialPair,
    ],
    list[ExternalUserGroup],
--- a/backend/ee/onyx/main.py
+++ b/backend/ee/onyx/main.py
@@ -15,7 +15,7 @@ from ee.onyx.server.enterprise_settings.api import (
 )
 from ee.onyx.server.manage.standard_answer import router as standard_answer_router
 from ee.onyx.server.middleware.tenant_tracking import add_tenant_id_middleware
-from ee.onyx.server.oauth import router as oauth_router
+from ee.onyx.server.oauth.api import router as ee_oauth_router
 from ee.onyx.server.query_and_chat.chat_backend import (
    router as chat_router,
 )
@@ -128,7 +128,7 @@ def get_application() -> FastAPI:
    include_router_with_global_prefix_prepended(application, query_router)
    include_router_with_global_prefix_prepended(application, chat_router)
    include_router_with_global_prefix_prepended(application, standard_answer_router)
-    include_router_with_global_prefix_prepended(application, oauth_router)
+    include_router_with_global_prefix_prepended(application, ee_oauth_router)

    # Enterprise-only global settings
    include_router_with_global_prefix_prepended(
@@ -152,4 +152,8 @@ def get_application() -> FastAPI:
    # environment variable. Used to automate deployment for multiple environments.
    seed_db()

+    # for debugging discovered routes
+    # for route in application.router.routes:
+    #     print(f"Path: {route.path}, Methods: {route.methods}")
+
    return application
--- a/backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py
+++ b/backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py
@@ -22,7 +22,7 @@ from onyx.onyxbot.slack.blocks import get_restate_blocks
 from onyx.onyxbot.slack.constants import GENERATE_ANSWER_BUTTON_ACTION_ID
 from onyx.onyxbot.slack.handlers.utils import send_team_member_message
 from onyx.onyxbot.slack.models import SlackMessageInfo
-from onyx.onyxbot.slack.utils import respond_in_thread
+from onyx.onyxbot.slack.utils import respond_in_thread_or_channel
 from onyx.onyxbot.slack.utils import update_emote_react
 from onyx.utils.logger import OnyxLoggingAdapter
 from onyx.utils.logger import setup_logger
@@ -216,7 +216,7 @@ def _handle_standard_answers(
        all_blocks = restate_question_blocks + answer_blocks

        try:
-            respond_in_thread(
+            respond_in_thread_or_channel(
                client=client,
                channel=message_info.channel_to_respond,
                receiver_ids=receiver_ids,
@@ -231,6 +231,7 @@ def _handle_standard_answers(
                    client=client,
                    channel=message_info.channel_to_respond,
                    thread_ts=slack_thread_id,
+                    receiver_ids=receiver_ids,
                )

            return True
--- a/backend/ee/onyx/server/middleware/tenant_tracking.py
+++ b/backend/ee/onyx/server/middleware/tenant_tracking.py
@@ -33,7 +33,7 @@ def add_tenant_id_middleware(app: FastAPI, logger: logging.LoggerAdapter) -> Non
            return await call_next(request)

        except Exception as e:
-            logger.error(f"Error in tenant ID middleware: {str(e)}")
+            logger.exception(f"Error in tenant ID middleware: {str(e)}")
            raise


@@ -49,7 +49,7 @@ async def _get_tenant_id_from_request(
    """
    # Check for API key
    tenant_id = extract_tenant_from_api_key_header(request)
-    if tenant_id:
+    if tenant_id is not None:
        return tenant_id

    # Check for anonymous user cookie
--- a/backend/ee/onyx/server/oauth.py
+++ b/backend/ee/onyx/server/oauth.py
@@ -1,631 +0,0 @@
-import base64
-import json
-import uuid
-from typing import Any
-from typing import cast
-
-import requests
-from fastapi import APIRouter
-from fastapi import Depends
-from fastapi import HTTPException
-from fastapi.responses import JSONResponse
-from pydantic import BaseModel
-from sqlalchemy.orm import Session
-
-from ee.onyx.configs.app_configs import OAUTH_CONFLUENCE_CLIENT_ID
-from ee.onyx.configs.app_configs import OAUTH_CONFLUENCE_CLIENT_SECRET
-from ee.onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_ID
-from ee.onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_SECRET
-from ee.onyx.configs.app_configs import OAUTH_SLACK_CLIENT_ID
-from ee.onyx.configs.app_configs import OAUTH_SLACK_CLIENT_SECRET
-from onyx.auth.users import current_user
-from onyx.configs.app_configs import WEB_DOMAIN
-from onyx.configs.constants import DocumentSource
-from onyx.connectors.google_utils.google_auth import get_google_oauth_creds
-from onyx.connectors.google_utils.google_auth import sanitize_oauth_credentials
-from onyx.connectors.google_utils.shared_constants import (
-    DB_CREDENTIALS_AUTHENTICATION_METHOD,
-)
-from onyx.connectors.google_utils.shared_constants import (
-    DB_CREDENTIALS_DICT_TOKEN_KEY,
-)
-from onyx.connectors.google_utils.shared_constants import (
-    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
-)
-from onyx.connectors.google_utils.shared_constants import (
-    GoogleOAuthAuthenticationMethod,
-)
-from onyx.db.credentials import create_credential
-from onyx.db.engine import get_current_tenant_id
-from onyx.db.engine import get_session
-from onyx.db.models import User
-from onyx.redis.redis_pool import get_redis_client
-from onyx.server.documents.models import CredentialBase
-from onyx.utils.logger import setup_logger
-
-
-logger = setup_logger()
-
-router = APIRouter(prefix="/oauth")
-
-
-class SlackOAuth:
-    # https://knock.app/blog/how-to-authenticate-users-in-slack-using-oauth
-    # Example: https://api.slack.com/authentication/oauth-v2#exchanging
-
-    class OAuthSession(BaseModel):
-        """Stored in redis to be looked up on callback"""
-
-        email: str
-        redirect_on_success: str | None  # Where to send the user if OAuth flow succeeds
-
-    CLIENT_ID = OAUTH_SLACK_CLIENT_ID
-    CLIENT_SECRET = OAUTH_SLACK_CLIENT_SECRET
-
-    TOKEN_URL = "https://slack.com/api/oauth.v2.access"
-
-    # SCOPE is per https://docs.onyx.app/connectors/slack
-    BOT_SCOPE = (
-        "channels:history,"
-        "channels:read,"
-        "groups:history,"
-        "groups:read,"
-        "channels:join,"
-        "im:history,"
-        "users:read,"
-        "users:read.email,"
-        "usergroups:read"
-    )
-
-    REDIRECT_URI = f"{WEB_DOMAIN}/admin/connectors/slack/oauth/callback"
-    DEV_REDIRECT_URI = f"https://redirectmeto.com/{REDIRECT_URI}"
-
-    @classmethod
-    def generate_oauth_url(cls, state: str) -> str:
-        return cls._generate_oauth_url_helper(cls.REDIRECT_URI, state)
-
-    @classmethod
-    def generate_dev_oauth_url(cls, state: str) -> str:
-        """dev mode workaround for localhost testing
-        - https://www.nango.dev/blog/oauth-redirects-on-localhost-with-https
-        """
-
-        return cls._generate_oauth_url_helper(cls.DEV_REDIRECT_URI, state)
-
-    @classmethod
-    def _generate_oauth_url_helper(cls, redirect_uri: str, state: str) -> str:
-        url = (
-            f"https://slack.com/oauth/v2/authorize"
-            f"?client_id={cls.CLIENT_ID}"
-            f"&redirect_uri={redirect_uri}"
-            f"&scope={cls.BOT_SCOPE}"
-            f"&state={state}"
-        )
-        return url
-
-    @classmethod
-    def session_dump_json(cls, email: str, redirect_on_success: str | None) -> str:
-        """Temporary state to store in redis. to be looked up on auth response.
-        Returns a json string.
-        """
-        session = SlackOAuth.OAuthSession(
-            email=email, redirect_on_success=redirect_on_success
-        )
-        return session.model_dump_json()
-
-    @classmethod
-    def parse_session(cls, session_json: str) -> OAuthSession:
-        session = SlackOAuth.OAuthSession.model_validate_json(session_json)
-        return session
-
-
-class ConfluenceCloudOAuth:
-    """work in progress"""
-
-    # https://developer.atlassian.com/cloud/confluence/oauth-2-3lo-apps/
-
-    class OAuthSession(BaseModel):
-        """Stored in redis to be looked up on callback"""
-
-        email: str
-        redirect_on_success: str | None  # Where to send the user if OAuth flow succeeds
-
-    CLIENT_ID = OAUTH_CONFLUENCE_CLIENT_ID
-    CLIENT_SECRET = OAUTH_CONFLUENCE_CLIENT_SECRET
-    TOKEN_URL = "https://auth.atlassian.com/oauth/token"
-
-    # All read scopes per https://developer.atlassian.com/cloud/confluence/scopes-for-oauth-2-3LO-and-forge-apps/
-    CONFLUENCE_OAUTH_SCOPE = (
-        "read:confluence-props%20"
-        "read:confluence-content.all%20"
-        "read:confluence-content.summary%20"
-        "read:confluence-content.permission%20"
-        "read:confluence-user%20"
-        "read:confluence-groups%20"
-        "readonly:content.attachment:confluence"
-    )
-
-    REDIRECT_URI = f"{WEB_DOMAIN}/admin/connectors/confluence/oauth/callback"
-    DEV_REDIRECT_URI = f"https://redirectmeto.com/{REDIRECT_URI}"
-
-    # eventually for Confluence Data Center
-    # oauth_url = (
-    #     f"http://localhost:8090/rest/oauth/v2/authorize?client_id={CONFLUENCE_OAUTH_CLIENT_ID}"
-    #     f"&scope={CONFLUENCE_OAUTH_SCOPE_2}"
-    #     f"&redirect_uri={redirectme_uri}"
-    # )
-
-    @classmethod
-    def generate_oauth_url(cls, state: str) -> str:
-        return cls._generate_oauth_url_helper(cls.REDIRECT_URI, state)
-
-    @classmethod
-    def generate_dev_oauth_url(cls, state: str) -> str:
-        """dev mode workaround for localhost testing
-        - https://www.nango.dev/blog/oauth-redirects-on-localhost-with-https
-        """
-        return cls._generate_oauth_url_helper(cls.DEV_REDIRECT_URI, state)
-
-    @classmethod
-    def _generate_oauth_url_helper(cls, redirect_uri: str, state: str) -> str:
-        url = (
-            "https://auth.atlassian.com/authorize"
-            f"?audience=api.atlassian.com"
-            f"&client_id={cls.CLIENT_ID}"
-            f"&redirect_uri={redirect_uri}"
-            f"&scope={cls.CONFLUENCE_OAUTH_SCOPE}"
-            f"&state={state}"
-            "&response_type=code"
-            "&prompt=consent"
-        )
-        return url
-
-    @classmethod
-    def session_dump_json(cls, email: str, redirect_on_success: str | None) -> str:
-        """Temporary state to store in redis. to be looked up on auth response.
-        Returns a json string.
-        """
-        session = ConfluenceCloudOAuth.OAuthSession(
-            email=email, redirect_on_success=redirect_on_success
-        )
-        return session.model_dump_json()
-
-    @classmethod
-    def parse_session(cls, session_json: str) -> SlackOAuth.OAuthSession:
-        session = SlackOAuth.OAuthSession.model_validate_json(session_json)
-        return session
-
-
-class GoogleDriveOAuth:
-    # https://developers.google.com/identity/protocols/oauth2
-    # https://developers.google.com/identity/protocols/oauth2/web-server
-
-    class OAuthSession(BaseModel):
-        """Stored in redis to be looked up on callback"""
-
-        email: str
-        redirect_on_success: str | None  # Where to send the user if OAuth flow succeeds
-
-    CLIENT_ID = OAUTH_GOOGLE_DRIVE_CLIENT_ID
-    CLIENT_SECRET = OAUTH_GOOGLE_DRIVE_CLIENT_SECRET
-
-    TOKEN_URL = "https://oauth2.googleapis.com/token"
-
-    # SCOPE is per https://docs.onyx.app/connectors/google-drive
-    # TODO: Merge with or use google_utils.GOOGLE_SCOPES
-    SCOPE = (
-        "https://www.googleapis.com/auth/drive.readonly%20"
-        "https://www.googleapis.com/auth/drive.metadata.readonly%20"
-        "https://www.googleapis.com/auth/admin.directory.user.readonly%20"
-        "https://www.googleapis.com/auth/admin.directory.group.readonly"
-    )
-
-    REDIRECT_URI = f"{WEB_DOMAIN}/admin/connectors/google-drive/oauth/callback"
-    DEV_REDIRECT_URI = f"https://redirectmeto.com/{REDIRECT_URI}"
-
-    @classmethod
-    def generate_oauth_url(cls, state: str) -> str:
-        return cls._generate_oauth_url_helper(cls.REDIRECT_URI, state)
-
-    @classmethod
-    def generate_dev_oauth_url(cls, state: str) -> str:
-        """dev mode workaround for localhost testing
-        - https://www.nango.dev/blog/oauth-redirects-on-localhost-with-https
-        """
-
-        return cls._generate_oauth_url_helper(cls.DEV_REDIRECT_URI, state)
-
-    @classmethod
-    def _generate_oauth_url_helper(cls, redirect_uri: str, state: str) -> str:
-        # without prompt=consent, a refresh token is only issued the first time the user approves
-        url = (
-            f"https://accounts.google.com/o/oauth2/v2/auth"
-            f"?client_id={cls.CLIENT_ID}"
-            f"&redirect_uri={redirect_uri}"
-            "&response_type=code"
-            f"&scope={cls.SCOPE}"
-            "&access_type=offline"
-            f"&state={state}"
-            "&prompt=consent"
-        )
-        return url
-
-    @classmethod
-    def session_dump_json(cls, email: str, redirect_on_success: str | None) -> str:
-        """Temporary state to store in redis. to be looked up on auth response.
-        Returns a json string.
-        """
-        session = GoogleDriveOAuth.OAuthSession(
-            email=email, redirect_on_success=redirect_on_success
-        )
-        return session.model_dump_json()
-
-    @classmethod
-    def parse_session(cls, session_json: str) -> OAuthSession:
-        session = GoogleDriveOAuth.OAuthSession.model_validate_json(session_json)
-        return session
-
-
-@router.post("/prepare-authorization-request")
-def prepare_authorization_request(
-    connector: DocumentSource,
-    redirect_on_success: str | None,
-    user: User = Depends(current_user),
-    tenant_id: str | None = Depends(get_current_tenant_id),
-) -> JSONResponse:
-    """Used by the frontend to generate the url for the user's browser during auth request.
-
-    Example: https://www.oauth.com/oauth2-servers/authorization/the-authorization-request/
-    """
-
-    # create random oauth state param for security and to retrieve user data later
-    oauth_uuid = uuid.uuid4()
-    oauth_uuid_str = str(oauth_uuid)
-
-    # urlsafe b64 encode the uuid for the oauth url
-    oauth_state = (
-        base64.urlsafe_b64encode(oauth_uuid.bytes).rstrip(b"=").decode("utf-8")
-    )
-    session: str
-
-    if connector == DocumentSource.SLACK:
-        oauth_url = SlackOAuth.generate_oauth_url(oauth_state)
-        session = SlackOAuth.session_dump_json(
-            email=user.email, redirect_on_success=redirect_on_success
-        )
-    elif connector == DocumentSource.GOOGLE_DRIVE:
-        oauth_url = GoogleDriveOAuth.generate_oauth_url(oauth_state)
-        session = GoogleDriveOAuth.session_dump_json(
-            email=user.email, redirect_on_success=redirect_on_success
-        )
-    # elif connector == DocumentSource.CONFLUENCE:
-    #     oauth_url = ConfluenceCloudOAuth.generate_oauth_url(oauth_state)
-    #     session = ConfluenceCloudOAuth.session_dump_json(
-    #         email=user.email, redirect_on_success=redirect_on_success
-    #     )
-    # elif connector == DocumentSource.JIRA:
-    #     oauth_url = JiraCloudOAuth.generate_dev_oauth_url(oauth_state)
-    else:
-        oauth_url = None
-
-    if not oauth_url:
-        raise HTTPException(
-            status_code=404,
-            detail=f"The document source type {connector} does not have OAuth implemented",
-        )
-
-    r = get_redis_client(tenant_id=tenant_id)
-
-    # store important session state to retrieve when the user is redirected back
-    # 10 min is the max we want an oauth flow to be valid
-    r.set(f"da_oauth:{oauth_uuid_str}", session, ex=600)
-
-    return JSONResponse(content={"url": oauth_url})
-
-
-@router.post("/connector/slack/callback")
-def handle_slack_oauth_callback(
-    code: str,
-    state: str,
-    user: User = Depends(current_user),
-    db_session: Session = Depends(get_session),
-    tenant_id: str | None = Depends(get_current_tenant_id),
-) -> JSONResponse:
-    if not SlackOAuth.CLIENT_ID or not SlackOAuth.CLIENT_SECRET:
-        raise HTTPException(
-            status_code=500,
-            detail="Slack client ID or client secret is not configured.",
-        )
-
-    r = get_redis_client(tenant_id=tenant_id)
-
-    # recover the state
-    padded_state = state + "=" * (
-        -len(state) % 4
-    )  # Add padding back (Base64 decoding requires padding)
-    uuid_bytes = base64.urlsafe_b64decode(
-        padded_state
-    )  # Decode the Base64 string back to bytes
-
-    # Convert bytes back to a UUID
-    oauth_uuid = uuid.UUID(bytes=uuid_bytes)
-    oauth_uuid_str = str(oauth_uuid)
-
-    r_key = f"da_oauth:{oauth_uuid_str}"
-
-    session_json_bytes = cast(bytes, r.get(r_key))
-    if not session_json_bytes:
-        raise HTTPException(
-            status_code=400,
-            detail=f"Slack OAuth failed - OAuth state key not found: key={r_key}",
-        )
-
-    session_json = session_json_bytes.decode("utf-8")
-    try:
-        session = SlackOAuth.parse_session(session_json)
-
-        # Exchange the authorization code for an access token
-        response = requests.post(
-            SlackOAuth.TOKEN_URL,
-            headers={"Content-Type": "application/x-www-form-urlencoded"},
-            data={
-                "client_id": SlackOAuth.CLIENT_ID,
-                "client_secret": SlackOAuth.CLIENT_SECRET,
-                "code": code,
-                "redirect_uri": SlackOAuth.REDIRECT_URI,
-            },
-        )
-
-        response_data = response.json()
-
-        if not response_data.get("ok"):
-            raise HTTPException(
-                status_code=400,
-                detail=f"Slack OAuth failed: {response_data.get('error')}",
-            )
-
-        # Extract token and team information
-        access_token: str = response_data.get("access_token")
-        team_id: str = response_data.get("team", {}).get("id")
-        authed_user_id: str = response_data.get("authed_user", {}).get("id")
-
-        credential_info = CredentialBase(
-            credential_json={"slack_bot_token": access_token},
-            admin_public=True,
-            source=DocumentSource.SLACK,
-            name="Slack OAuth",
-        )
-
-        create_credential(credential_info, user, db_session)
-    except Exception as e:
-        return JSONResponse(
-            status_code=500,
-            content={
-                "success": False,
-                "message": f"An error occurred during Slack OAuth: {str(e)}",
-            },
-        )
-    finally:
-        r.delete(r_key)
-
-    # return the result
-    return JSONResponse(
-        content={
-            "success": True,
-            "message": "Slack OAuth completed successfully.",
-            "team_id": team_id,
-            "authed_user_id": authed_user_id,
-            "redirect_on_success": session.redirect_on_success,
-        }
-    )
-
-
-# Work in progress
-# @router.post("/connector/confluence/callback")
-# def handle_confluence_oauth_callback(
-#     code: str,
-#     state: str,
-#     user: User = Depends(current_user),
-#     db_session: Session = Depends(get_session),
-#     tenant_id: str | None = Depends(get_current_tenant_id),
-# ) -> JSONResponse:
-#     if not ConfluenceCloudOAuth.CLIENT_ID or not ConfluenceCloudOAuth.CLIENT_SECRET:
-#         raise HTTPException(
-#             status_code=500,
-#             detail="Confluence client ID or client secret is not configured."
-#         )
-
-#     r = get_redis_client(tenant_id=tenant_id)
-
-#     # recover the state
-#     padded_state = state + '=' * (-len(state) % 4)  # Add padding back (Base64 decoding requires padding)
-#     uuid_bytes = base64.urlsafe_b64decode(padded_state)  # Decode the Base64 string back to bytes
-
-#     # Convert bytes back to a UUID
-#     oauth_uuid = uuid.UUID(bytes=uuid_bytes)
-#     oauth_uuid_str = str(oauth_uuid)
-
-#     r_key = f"da_oauth:{oauth_uuid_str}"
-
-#     result = r.get(r_key)
-#     if not result:
-#         raise HTTPException(
-#             status_code=400,
-#             detail=f"Confluence OAuth failed - OAuth state key not found: key={r_key}"
-#         )
-
-#     try:
-#         session = ConfluenceCloudOAuth.parse_session(result)
-
-#         # Exchange the authorization code for an access token
-#         response = requests.post(
-#             ConfluenceCloudOAuth.TOKEN_URL,
-#             headers={"Content-Type": "application/x-www-form-urlencoded"},
-#             data={
-#                 "client_id": ConfluenceCloudOAuth.CLIENT_ID,
-#                 "client_secret": ConfluenceCloudOAuth.CLIENT_SECRET,
-#                 "code": code,
-#                 "redirect_uri": ConfluenceCloudOAuth.DEV_REDIRECT_URI,
-#             },
-#         )
-
-#         response_data = response.json()
-
-#         if not response_data.get("ok"):
-#             raise HTTPException(
-#                 status_code=400,
-#                 detail=f"ConfluenceCloudOAuth OAuth failed: {response_data.get('error')}"
-#             )
-
-#         # Extract token and team information
-#         access_token: str = response_data.get("access_token")
-#         team_id: str = response_data.get("team", {}).get("id")
-#         authed_user_id: str = response_data.get("authed_user", {}).get("id")
-
-#         credential_info = CredentialBase(
-#             credential_json={"slack_bot_token": access_token},
-#             admin_public=True,
-#             source=DocumentSource.CONFLUENCE,
-#             name="Confluence OAuth",
-#         )
-
-#         logger.info(f"Slack access token: {access_token}")
-
-#         credential = create_credential(credential_info, user, db_session)
-
-#         logger.info(f"new_credential_id={credential.id}")
-#     except Exception as e:
-#         return JSONResponse(
-#             status_code=500,
-#             content={
-#                 "success": False,
-#                 "message": f"An error occurred during Slack OAuth: {str(e)}",
-#             },
-#         )
-#     finally:
-#         r.delete(r_key)
-
-#     # return the result
-#     return JSONResponse(
-#         content={
-#             "success": True,
-#             "message": "Slack OAuth completed successfully.",
-#             "team_id": team_id,
-#             "authed_user_id": authed_user_id,
-#             "redirect_on_success": session.redirect_on_success,
-#         }
-#     )
-
-
-@router.post("/connector/google-drive/callback")
-def handle_google_drive_oauth_callback(
-    code: str,
-    state: str,
-    user: User = Depends(current_user),
-    db_session: Session = Depends(get_session),
-    tenant_id: str | None = Depends(get_current_tenant_id),
-) -> JSONResponse:
-    if not GoogleDriveOAuth.CLIENT_ID or not GoogleDriveOAuth.CLIENT_SECRET:
-        raise HTTPException(
-            status_code=500,
-            detail="Google Drive client ID or client secret is not configured.",
-        )
-
-    r = get_redis_client(tenant_id=tenant_id)
-
-    # recover the state
-    padded_state = state + "=" * (
-        -len(state) % 4
-    )  # Add padding back (Base64 decoding requires padding)
-    uuid_bytes = base64.urlsafe_b64decode(
-        padded_state
-    )  # Decode the Base64 string back to bytes
-
-    # Convert bytes back to a UUID
-    oauth_uuid = uuid.UUID(bytes=uuid_bytes)
-    oauth_uuid_str = str(oauth_uuid)
-
-    r_key = f"da_oauth:{oauth_uuid_str}"
-
-    session_json_bytes = cast(bytes, r.get(r_key))
-    if not session_json_bytes:
-        raise HTTPException(
-            status_code=400,
-            detail=f"Google Drive OAuth failed - OAuth state key not found: key={r_key}",
-        )
-
-    session_json = session_json_bytes.decode("utf-8")
-    session: GoogleDriveOAuth.OAuthSession
-    try:
-        session = GoogleDriveOAuth.parse_session(session_json)
-
-        # Exchange the authorization code for an access token
-        response = requests.post(
-            GoogleDriveOAuth.TOKEN_URL,
-            headers={"Content-Type": "application/x-www-form-urlencoded"},
-            data={
-                "client_id": GoogleDriveOAuth.CLIENT_ID,
-                "client_secret": GoogleDriveOAuth.CLIENT_SECRET,
-                "code": code,
-                "redirect_uri": GoogleDriveOAuth.REDIRECT_URI,
-                "grant_type": "authorization_code",
-            },
-        )
-
-        response.raise_for_status()
-
-        authorization_response: dict[str, Any] = response.json()
-
-        # the connector wants us to store the json in its authorized_user_info format
-        # returned from OAuthCredentials.get_authorized_user_info().
-        # So refresh immediately via get_google_oauth_creds with the params filled in
-        # from fields in authorization_response to get the json we need
-        authorized_user_info = {}
-        authorized_user_info["client_id"] = OAUTH_GOOGLE_DRIVE_CLIENT_ID
-        authorized_user_info["client_secret"] = OAUTH_GOOGLE_DRIVE_CLIENT_SECRET
-        authorized_user_info["refresh_token"] = authorization_response["refresh_token"]
-
-        token_json_str = json.dumps(authorized_user_info)
-        oauth_creds = get_google_oauth_creds(
-            token_json_str=token_json_str, source=DocumentSource.GOOGLE_DRIVE
-        )
-        if not oauth_creds:
-            raise RuntimeError("get_google_oauth_creds returned None.")
-
-        # save off the credentials
-        oauth_creds_sanitized_json_str = sanitize_oauth_credentials(oauth_creds)
-
-        credential_dict: dict[str, str] = {}
-        credential_dict[DB_CREDENTIALS_DICT_TOKEN_KEY] = oauth_creds_sanitized_json_str
-        credential_dict[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] = session.email
-        credential_dict[
-            DB_CREDENTIALS_AUTHENTICATION_METHOD
-        ] = GoogleOAuthAuthenticationMethod.OAUTH_INTERACTIVE.value
-
-        credential_info = CredentialBase(
-            credential_json=credential_dict,
-            admin_public=True,
-            source=DocumentSource.GOOGLE_DRIVE,
-            name="OAuth (interactive)",
-        )
-
-        create_credential(credential_info, user, db_session)
-    except Exception as e:
-        return JSONResponse(
-            status_code=500,
-            content={
-                "success": False,
-                "message": f"An error occurred during Google Drive OAuth: {str(e)}",
-            },
-        )
-    finally:
-        r.delete(r_key)
-
-    # return the result
-    return JSONResponse(
-        content={
-            "success": True,
-            "message": "Google Drive OAuth completed successfully.",
-            "redirect_on_success": session.redirect_on_success,
-        }
-    )
--- a/backend/ee/onyx/server/oauth/api.py
+++ b/backend/ee/onyx/server/oauth/api.py
@@ -0,0 +1,91 @@
+import base64
+import uuid
+
+from fastapi import Depends
+from fastapi import HTTPException
+from fastapi.responses import JSONResponse
+
+from ee.onyx.server.oauth.api_router import router
+from ee.onyx.server.oauth.confluence_cloud import ConfluenceCloudOAuth
+from ee.onyx.server.oauth.google_drive import GoogleDriveOAuth
+from ee.onyx.server.oauth.slack import SlackOAuth
+from onyx.auth.users import current_admin_user
+from onyx.configs.app_configs import DEV_MODE
+from onyx.configs.constants import DocumentSource
+from onyx.db.engine import get_current_tenant_id
+from onyx.db.models import User
+from onyx.redis.redis_pool import get_redis_client
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+@router.post("/prepare-authorization-request")
+def prepare_authorization_request(
+    connector: DocumentSource,
+    redirect_on_success: str | None,
+    user: User = Depends(current_admin_user),
+    tenant_id: str | None = Depends(get_current_tenant_id),
+) -> JSONResponse:
+    """Used by the frontend to generate the url for the user's browser during auth request.
+
+    Example: https://www.oauth.com/oauth2-servers/authorization/the-authorization-request/
+    """
+
+    # create random oauth state param for security and to retrieve user data later
+    oauth_uuid = uuid.uuid4()
+    oauth_uuid_str = str(oauth_uuid)
+
+    # urlsafe b64 encode the uuid for the oauth url
+    oauth_state = (
+        base64.urlsafe_b64encode(oauth_uuid.bytes).rstrip(b"=").decode("utf-8")
+    )
+
+    session: str | None = None
+    if connector == DocumentSource.SLACK:
+        if not DEV_MODE:
+            oauth_url = SlackOAuth.generate_oauth_url(oauth_state)
+        else:
+            oauth_url = SlackOAuth.generate_dev_oauth_url(oauth_state)
+
+        session = SlackOAuth.session_dump_json(
+            email=user.email, redirect_on_success=redirect_on_success
+        )
+    elif connector == DocumentSource.CONFLUENCE:
+        if not DEV_MODE:
+            oauth_url = ConfluenceCloudOAuth.generate_oauth_url(oauth_state)
+        else:
+            oauth_url = ConfluenceCloudOAuth.generate_dev_oauth_url(oauth_state)
+        session = ConfluenceCloudOAuth.session_dump_json(
+            email=user.email, redirect_on_success=redirect_on_success
+        )
+    elif connector == DocumentSource.GOOGLE_DRIVE:
+        if not DEV_MODE:
+            oauth_url = GoogleDriveOAuth.generate_oauth_url(oauth_state)
+        else:
+            oauth_url = GoogleDriveOAuth.generate_dev_oauth_url(oauth_state)
+        session = GoogleDriveOAuth.session_dump_json(
+            email=user.email, redirect_on_success=redirect_on_success
+        )
+    else:
+        oauth_url = None
+
+    if not oauth_url:
+        raise HTTPException(
+            status_code=404,
+            detail=f"The document source type {connector} does not have OAuth implemented",
+        )
+
+    if not session:
+        raise HTTPException(
+            status_code=500,
+            detail=f"The document source type {connector} failed to generate an OAuth session.",
+        )
+
+    r = get_redis_client(tenant_id=tenant_id)
+
+    # store important session state to retrieve when the user is redirected back
+    # 10 min is the max we want an oauth flow to be valid
+    r.set(f"da_oauth:{oauth_uuid_str}", session, ex=600)
+
+    return JSONResponse(content={"url": oauth_url})
--- a/backend/ee/onyx/server/oauth/api_router.py
+++ b/backend/ee/onyx/server/oauth/api_router.py
@@ -0,0 +1,3 @@
+from fastapi import APIRouter
+
+router: APIRouter = APIRouter(prefix="/oauth")
--- a/backend/ee/onyx/server/oauth/confluence_cloud.py
+++ b/backend/ee/onyx/server/oauth/confluence_cloud.py
@@ -0,0 +1,362 @@
+import base64
+import uuid
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone
+from typing import Any
+from typing import cast
+
+import requests
+from fastapi import Depends
+from fastapi import HTTPException
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from pydantic import ValidationError
+from sqlalchemy.orm import Session
+
+from ee.onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_ID
+from ee.onyx.configs.app_configs import OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET
+from ee.onyx.server.oauth.api_router import router
+from onyx.auth.users import current_admin_user
+from onyx.configs.app_configs import DEV_MODE
+from onyx.configs.app_configs import WEB_DOMAIN
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.confluence.utils import CONFLUENCE_OAUTH_TOKEN_URL
+from onyx.db.credentials import create_credential
+from onyx.db.credentials import fetch_credential_by_id_for_user
+from onyx.db.credentials import update_credential_json
+from onyx.db.engine import get_current_tenant_id
+from onyx.db.engine import get_session
+from onyx.db.models import User
+from onyx.redis.redis_pool import get_redis_client
+from onyx.server.documents.models import CredentialBase
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class ConfluenceCloudOAuth:
+    # https://developer.atlassian.com/cloud/confluence/oauth-2-3lo-apps/
+
+    class OAuthSession(BaseModel):
+        """Stored in redis to be looked up on callback"""
+
+        email: str
+        redirect_on_success: str | None  # Where to send the user if OAuth flow succeeds
+
+    class TokenResponse(BaseModel):
+        access_token: str
+        expires_in: int
+        token_type: str
+        refresh_token: str
+        scope: str
+
+    class AccessibleResources(BaseModel):
+        id: str
+        name: str
+        url: str
+        scopes: list[str]
+        avatarUrl: str
+
+    CLIENT_ID = OAUTH_CONFLUENCE_CLOUD_CLIENT_ID
+    CLIENT_SECRET = OAUTH_CONFLUENCE_CLOUD_CLIENT_SECRET
+    TOKEN_URL = CONFLUENCE_OAUTH_TOKEN_URL
+
+    ACCESSIBLE_RESOURCE_URL = (
+        "https://api.atlassian.com/oauth/token/accessible-resources"
+    )
+
+    # All read scopes per https://developer.atlassian.com/cloud/confluence/scopes-for-oauth-2-3LO-and-forge-apps/
+    CONFLUENCE_OAUTH_SCOPE = (
+        # classic scope
+        "read:confluence-space.summary%20"
+        "read:confluence-props%20"
+        "read:confluence-content.all%20"
+        "read:confluence-content.summary%20"
+        "read:confluence-content.permission%20"
+        "read:confluence-user%20"
+        "read:confluence-groups%20"
+        "readonly:content.attachment:confluence%20"
+        "search:confluence%20"
+        # granular scope
+        "read:attachment:confluence%20"  # possibly unneeded unless calling v2 attachments api
+        "read:content-details:confluence%20"  # for permission sync
+        "offline_access"
+    )
+
+    REDIRECT_URI = f"{WEB_DOMAIN}/admin/connectors/confluence/oauth/callback"
+    DEV_REDIRECT_URI = f"https://redirectmeto.com/{REDIRECT_URI}"
+
+    # eventually for Confluence Data Center
+    # oauth_url = (
+    #     f"http://localhost:8090/rest/oauth/v2/authorize?client_id={CONFLUENCE_OAUTH_CLIENT_ID}"
+    #     f"&scope={CONFLUENCE_OAUTH_SCOPE_2}"
+    #     f"&redirect_uri={redirectme_uri}"
+    # )
+
+    @classmethod
+    def generate_oauth_url(cls, state: str) -> str:
+        return cls._generate_oauth_url_helper(cls.REDIRECT_URI, state)
+
+    @classmethod
+    def generate_dev_oauth_url(cls, state: str) -> str:
+        """dev mode workaround for localhost testing
+        - https://www.nango.dev/blog/oauth-redirects-on-localhost-with-https
+        """
+        return cls._generate_oauth_url_helper(cls.DEV_REDIRECT_URI, state)
+
+    @classmethod
+    def _generate_oauth_url_helper(cls, redirect_uri: str, state: str) -> str:
+        # https://developer.atlassian.com/cloud/jira/platform/oauth-2-3lo-apps/#1--direct-the-user-to-the-authorization-url-to-get-an-authorization-code
+
+        url = (
+            "https://auth.atlassian.com/authorize"
+            f"?audience=api.atlassian.com"
+            f"&client_id={cls.CLIENT_ID}"
+            f"&scope={cls.CONFLUENCE_OAUTH_SCOPE}"
+            f"&redirect_uri={redirect_uri}"
+            f"&state={state}"
+            "&response_type=code"
+            "&prompt=consent"
+        )
+        return url
+
+    @classmethod
+    def session_dump_json(cls, email: str, redirect_on_success: str | None) -> str:
+        """Temporary state to store in redis. to be looked up on auth response.
+        Returns a json string.
+        """
+        session = ConfluenceCloudOAuth.OAuthSession(
+            email=email, redirect_on_success=redirect_on_success
+        )
+        return session.model_dump_json()
+
+    @classmethod
+    def parse_session(cls, session_json: str) -> OAuthSession:
+        session = ConfluenceCloudOAuth.OAuthSession.model_validate_json(session_json)
+        return session
+
+    @classmethod
+    def generate_finalize_url(cls, credential_id: int) -> str:
+        return f"{WEB_DOMAIN}/admin/connectors/confluence/oauth/finalize?credential={credential_id}"
+
+
+@router.post("/connector/confluence/callback")
+def confluence_oauth_callback(
+    code: str,
+    state: str,
+    user: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+    tenant_id: str | None = Depends(get_current_tenant_id),
+) -> JSONResponse:
+    """Handles the backend logic for the frontend page that the user is redirected to
+    after visiting the oauth authorization url."""
+
+    if not ConfluenceCloudOAuth.CLIENT_ID or not ConfluenceCloudOAuth.CLIENT_SECRET:
+        raise HTTPException(
+            status_code=500,
+            detail="Confluence Cloud client ID or client secret is not configured.",
+        )
+
+    r = get_redis_client(tenant_id=tenant_id)
+
+    # recover the state
+    padded_state = state + "=" * (
+        -len(state) % 4
+    )  # Add padding back (Base64 decoding requires padding)
+    uuid_bytes = base64.urlsafe_b64decode(
+        padded_state
+    )  # Decode the Base64 string back to bytes
+
+    # Convert bytes back to a UUID
+    oauth_uuid = uuid.UUID(bytes=uuid_bytes)
+    oauth_uuid_str = str(oauth_uuid)
+
+    r_key = f"da_oauth:{oauth_uuid_str}"
+
+    session_json_bytes = cast(bytes, r.get(r_key))
+    if not session_json_bytes:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Confluence Cloud OAuth failed - OAuth state key not found: key={r_key}",
+        )
+
+    session_json = session_json_bytes.decode("utf-8")
+    try:
+        session = ConfluenceCloudOAuth.parse_session(session_json)
+
+        if not DEV_MODE:
+            redirect_uri = ConfluenceCloudOAuth.REDIRECT_URI
+        else:
+            redirect_uri = ConfluenceCloudOAuth.DEV_REDIRECT_URI
+
+        # Exchange the authorization code for an access token
+        response = requests.post(
+            ConfluenceCloudOAuth.TOKEN_URL,
+            headers={"Content-Type": "application/x-www-form-urlencoded"},
+            data={
+                "client_id": ConfluenceCloudOAuth.CLIENT_ID,
+                "client_secret": ConfluenceCloudOAuth.CLIENT_SECRET,
+                "code": code,
+                "redirect_uri": redirect_uri,
+                "grant_type": "authorization_code",
+            },
+        )
+
+        token_response: ConfluenceCloudOAuth.TokenResponse | None = None
+
+        try:
+            token_response = ConfluenceCloudOAuth.TokenResponse.model_validate_json(
+                response.text
+            )
+        except Exception:
+            raise RuntimeError(
+                "Confluence Cloud OAuth failed during code/token exchange."
+            )
+
+        now = datetime.now(timezone.utc)
+        expires_at = now + timedelta(seconds=token_response.expires_in)
+
+        credential_info = CredentialBase(
+            credential_json={
+                "confluence_access_token": token_response.access_token,
+                "confluence_refresh_token": token_response.refresh_token,
+                "created_at": now.isoformat(),
+                "expires_at": expires_at.isoformat(),
+                "expires_in": token_response.expires_in,
+                "scope": token_response.scope,
+            },
+            admin_public=True,
+            source=DocumentSource.CONFLUENCE,
+            name="Confluence Cloud OAuth",
+        )
+
+        credential = create_credential(credential_info, user, db_session)
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={
+                "success": False,
+                "message": f"An error occurred during Confluence Cloud OAuth: {str(e)}",
+            },
+        )
+    finally:
+        r.delete(r_key)
+
+    # return the result
+    return JSONResponse(
+        content={
+            "success": True,
+            "message": "Confluence Cloud OAuth completed successfully.",
+            "finalize_url": ConfluenceCloudOAuth.generate_finalize_url(credential.id),
+            "redirect_on_success": session.redirect_on_success,
+        }
+    )
+
+
+@router.get("/connector/confluence/accessible-resources")
+def confluence_oauth_accessible_resources(
+    credential_id: int,
+    user: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+    tenant_id: str | None = Depends(get_current_tenant_id),
+) -> JSONResponse:
+    """Atlassian's API is weird and does not supply us with enough info to be in a
+    usable state after authorizing.  All API's require a cloud id. We have to list
+    the accessible resources/sites and let the user choose which site to use."""
+
+    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)
+    if not credential:
+        raise HTTPException(400, f"Credential {credential_id} not found.")
+
+    credential_dict = credential.credential_json
+    access_token = credential_dict["confluence_access_token"]
+
+    try:
+        # Exchange the authorization code for an access token
+        response = requests.get(
+            ConfluenceCloudOAuth.ACCESSIBLE_RESOURCE_URL,
+            headers={
+                "Authorization": f"Bearer {access_token}",
+                "Accept": "application/json",
+            },
+        )
+
+        response.raise_for_status()
+        accessible_resources_data = response.json()
+
+        # Validate the list of AccessibleResources
+        try:
+            accessible_resources = [
+                ConfluenceCloudOAuth.AccessibleResources(**resource)
+                for resource in accessible_resources_data
+            ]
+        except ValidationError as e:
+            raise RuntimeError(f"Failed to parse accessible resources: {e}")
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={
+                "success": False,
+                "message": f"An error occurred retrieving Confluence Cloud accessible resources: {str(e)}",
+            },
+        )
+
+    # return the result
+    return JSONResponse(
+        content={
+            "success": True,
+            "message": "Confluence Cloud get accessible resources completed successfully.",
+            "accessible_resources": [
+                resource.model_dump() for resource in accessible_resources
+            ],
+        }
+    )
+
+
+@router.post("/connector/confluence/finalize")
+def confluence_oauth_finalize(
+    credential_id: int,
+    cloud_id: str,
+    cloud_name: str,
+    cloud_url: str,
+    user: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+    tenant_id: str | None = Depends(get_current_tenant_id),
+) -> JSONResponse:
+    """Saves the info for the selected cloud site to the credential.
+    This is the final step in the confluence oauth flow where after the traditional
+    OAuth process, the user has to select a site to associate with the credentials.
+    After this, the credential is usable."""
+
+    credential = fetch_credential_by_id_for_user(credential_id, user, db_session)
+    if not credential:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Confluence Cloud OAuth failed - credential {credential_id} not found.",
+        )
+
+    new_credential_json: dict[str, Any] = dict(credential.credential_json)
+    new_credential_json["cloud_id"] = cloud_id
+    new_credential_json["cloud_name"] = cloud_name
+    new_credential_json["wiki_base"] = cloud_url
+
+    try:
+        update_credential_json(credential_id, new_credential_json, user, db_session)
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={
+                "success": False,
+                "message": f"An error occurred during Confluence Cloud OAuth: {str(e)}",
+            },
+        )
+
+    # return the result
+    return JSONResponse(
+        content={
+            "success": True,
+            "message": "Confluence Cloud OAuth finalized successfully.",
+            "redirect_url": f"{WEB_DOMAIN}/admin/connectors/confluence",
+        }
+    )
--- a/backend/ee/onyx/server/oauth/google_drive.py
+++ b/backend/ee/onyx/server/oauth/google_drive.py
@@ -0,0 +1,229 @@
+import base64
+import json
+import uuid
+from typing import Any
+from typing import cast
+
+import requests
+from fastapi import Depends
+from fastapi import HTTPException
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from sqlalchemy.orm import Session
+
+from ee.onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_ID
+from ee.onyx.configs.app_configs import OAUTH_GOOGLE_DRIVE_CLIENT_SECRET
+from ee.onyx.server.oauth.api_router import router
+from onyx.auth.users import current_admin_user
+from onyx.configs.app_configs import DEV_MODE
+from onyx.configs.app_configs import WEB_DOMAIN
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.google_utils.google_auth import get_google_oauth_creds
+from onyx.connectors.google_utils.google_auth import sanitize_oauth_credentials
+from onyx.connectors.google_utils.shared_constants import (
+    DB_CREDENTIALS_AUTHENTICATION_METHOD,
+)
+from onyx.connectors.google_utils.shared_constants import (
+    DB_CREDENTIALS_DICT_TOKEN_KEY,
+)
+from onyx.connectors.google_utils.shared_constants import (
+    DB_CREDENTIALS_PRIMARY_ADMIN_KEY,
+)
+from onyx.connectors.google_utils.shared_constants import (
+    GoogleOAuthAuthenticationMethod,
+)
+from onyx.db.credentials import create_credential
+from onyx.db.engine import get_current_tenant_id
+from onyx.db.engine import get_session
+from onyx.db.models import User
+from onyx.redis.redis_pool import get_redis_client
+from onyx.server.documents.models import CredentialBase
+
+
+class GoogleDriveOAuth:
+    # https://developers.google.com/identity/protocols/oauth2
+    # https://developers.google.com/identity/protocols/oauth2/web-server
+
+    class OAuthSession(BaseModel):
+        """Stored in redis to be looked up on callback"""
+
+        email: str
+        redirect_on_success: str | None  # Where to send the user if OAuth flow succeeds
+
+    CLIENT_ID = OAUTH_GOOGLE_DRIVE_CLIENT_ID
+    CLIENT_SECRET = OAUTH_GOOGLE_DRIVE_CLIENT_SECRET
+
+    TOKEN_URL = "https://oauth2.googleapis.com/token"
+
+    # SCOPE is per https://docs.danswer.dev/connectors/google-drive
+    # TODO: Merge with or use google_utils.GOOGLE_SCOPES
+    SCOPE = (
+        "https://www.googleapis.com/auth/drive.readonly%20"
+        "https://www.googleapis.com/auth/drive.metadata.readonly%20"
+        "https://www.googleapis.com/auth/admin.directory.user.readonly%20"
+        "https://www.googleapis.com/auth/admin.directory.group.readonly"
+    )
+
+    REDIRECT_URI = f"{WEB_DOMAIN}/admin/connectors/google-drive/oauth/callback"
+    DEV_REDIRECT_URI = f"https://redirectmeto.com/{REDIRECT_URI}"
+
+    @classmethod
+    def generate_oauth_url(cls, state: str) -> str:
+        return cls._generate_oauth_url_helper(cls.REDIRECT_URI, state)
+
+    @classmethod
+    def generate_dev_oauth_url(cls, state: str) -> str:
+        """dev mode workaround for localhost testing
+        - https://www.nango.dev/blog/oauth-redirects-on-localhost-with-https
+        """
+
+        return cls._generate_oauth_url_helper(cls.DEV_REDIRECT_URI, state)
+
+    @classmethod
+    def _generate_oauth_url_helper(cls, redirect_uri: str, state: str) -> str:
+        # without prompt=consent, a refresh token is only issued the first time the user approves
+        url = (
+            f"https://accounts.google.com/o/oauth2/v2/auth"
+            f"?client_id={cls.CLIENT_ID}"
+            f"&redirect_uri={redirect_uri}"
+            "&response_type=code"
+            f"&scope={cls.SCOPE}"
+            "&access_type=offline"
+            f"&state={state}"
+            "&prompt=consent"
+        )
+        return url
+
+    @classmethod
+    def session_dump_json(cls, email: str, redirect_on_success: str | None) -> str:
+        """Temporary state to store in redis. to be looked up on auth response.
+        Returns a json string.
+        """
+        session = GoogleDriveOAuth.OAuthSession(
+            email=email, redirect_on_success=redirect_on_success
+        )
+        return session.model_dump_json()
+
+    @classmethod
+    def parse_session(cls, session_json: str) -> OAuthSession:
+        session = GoogleDriveOAuth.OAuthSession.model_validate_json(session_json)
+        return session
+
+
+@router.post("/connector/google-drive/callback")
+def handle_google_drive_oauth_callback(
+    code: str,
+    state: str,
+    user: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+    tenant_id: str | None = Depends(get_current_tenant_id),
+) -> JSONResponse:
+    if not GoogleDriveOAuth.CLIENT_ID or not GoogleDriveOAuth.CLIENT_SECRET:
+        raise HTTPException(
+            status_code=500,
+            detail="Google Drive client ID or client secret is not configured.",
+        )
+
+    r = get_redis_client(tenant_id=tenant_id)
+
+    # recover the state
+    padded_state = state + "=" * (
+        -len(state) % 4
+    )  # Add padding back (Base64 decoding requires padding)
+    uuid_bytes = base64.urlsafe_b64decode(
+        padded_state
+    )  # Decode the Base64 string back to bytes
+
+    # Convert bytes back to a UUID
+    oauth_uuid = uuid.UUID(bytes=uuid_bytes)
+    oauth_uuid_str = str(oauth_uuid)
+
+    r_key = f"da_oauth:{oauth_uuid_str}"
+
+    session_json_bytes = cast(bytes, r.get(r_key))
+    if not session_json_bytes:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Google Drive OAuth failed - OAuth state key not found: key={r_key}",
+        )
+
+    session_json = session_json_bytes.decode("utf-8")
+    try:
+        session = GoogleDriveOAuth.parse_session(session_json)
+
+        if not DEV_MODE:
+            redirect_uri = GoogleDriveOAuth.REDIRECT_URI
+        else:
+            redirect_uri = GoogleDriveOAuth.DEV_REDIRECT_URI
+
+        # Exchange the authorization code for an access token
+        response = requests.post(
+            GoogleDriveOAuth.TOKEN_URL,
+            headers={"Content-Type": "application/x-www-form-urlencoded"},
+            data={
+                "client_id": GoogleDriveOAuth.CLIENT_ID,
+                "client_secret": GoogleDriveOAuth.CLIENT_SECRET,
+                "code": code,
+                "redirect_uri": redirect_uri,
+                "grant_type": "authorization_code",
+            },
+        )
+
+        response.raise_for_status()
+
+        authorization_response: dict[str, Any] = response.json()
+
+        # the connector wants us to store the json in its authorized_user_info format
+        # returned from OAuthCredentials.get_authorized_user_info().
+        # So refresh immediately via get_google_oauth_creds with the params filled in
+        # from fields in authorization_response to get the json we need
+        authorized_user_info = {}
+        authorized_user_info["client_id"] = OAUTH_GOOGLE_DRIVE_CLIENT_ID
+        authorized_user_info["client_secret"] = OAUTH_GOOGLE_DRIVE_CLIENT_SECRET
+        authorized_user_info["refresh_token"] = authorization_response["refresh_token"]
+
+        token_json_str = json.dumps(authorized_user_info)
+        oauth_creds = get_google_oauth_creds(
+            token_json_str=token_json_str, source=DocumentSource.GOOGLE_DRIVE
+        )
+        if not oauth_creds:
+            raise RuntimeError("get_google_oauth_creds returned None.")
+
+        # save off the credentials
+        oauth_creds_sanitized_json_str = sanitize_oauth_credentials(oauth_creds)
+
+        credential_dict: dict[str, str] = {}
+        credential_dict[DB_CREDENTIALS_DICT_TOKEN_KEY] = oauth_creds_sanitized_json_str
+        credential_dict[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] = session.email
+        credential_dict[
+            DB_CREDENTIALS_AUTHENTICATION_METHOD
+        ] = GoogleOAuthAuthenticationMethod.OAUTH_INTERACTIVE.value
+
+        credential_info = CredentialBase(
+            credential_json=credential_dict,
+            admin_public=True,
+            source=DocumentSource.GOOGLE_DRIVE,
+            name="OAuth (interactive)",
+        )
+
+        create_credential(credential_info, user, db_session)
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={
+                "success": False,
+                "message": f"An error occurred during Google Drive OAuth: {str(e)}",
+            },
+        )
+    finally:
+        r.delete(r_key)
+
+    # return the result
+    return JSONResponse(
+        content={
+            "success": True,
+            "message": "Google Drive OAuth completed successfully.",
+            "finalize_url": None,
+            "redirect_on_success": session.redirect_on_success,
+        }
+    )
--- a/backend/ee/onyx/server/oauth/slack.py
+++ b/backend/ee/onyx/server/oauth/slack.py
@@ -0,0 +1,197 @@
+import base64
+import uuid
+from typing import cast
+
+import requests
+from fastapi import Depends
+from fastapi import HTTPException
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from sqlalchemy.orm import Session
+
+from ee.onyx.configs.app_configs import OAUTH_SLACK_CLIENT_ID
+from ee.onyx.configs.app_configs import OAUTH_SLACK_CLIENT_SECRET
+from ee.onyx.server.oauth.api_router import router
+from onyx.auth.users import current_admin_user
+from onyx.configs.app_configs import DEV_MODE
+from onyx.configs.app_configs import WEB_DOMAIN
+from onyx.configs.constants import DocumentSource
+from onyx.db.credentials import create_credential
+from onyx.db.engine import get_current_tenant_id
+from onyx.db.engine import get_session
+from onyx.db.models import User
+from onyx.redis.redis_pool import get_redis_client
+from onyx.server.documents.models import CredentialBase
+
+
+class SlackOAuth:
+    # https://knock.app/blog/how-to-authenticate-users-in-slack-using-oauth
+    # Example: https://api.slack.com/authentication/oauth-v2#exchanging
+
+    class OAuthSession(BaseModel):
+        """Stored in redis to be looked up on callback"""
+
+        email: str
+        redirect_on_success: str | None  # Where to send the user if OAuth flow succeeds
+
+    CLIENT_ID = OAUTH_SLACK_CLIENT_ID
+    CLIENT_SECRET = OAUTH_SLACK_CLIENT_SECRET
+
+    TOKEN_URL = "https://slack.com/api/oauth.v2.access"
+
+    # SCOPE is per https://docs.danswer.dev/connectors/slack
+    BOT_SCOPE = (
+        "channels:history,"
+        "channels:read,"
+        "groups:history,"
+        "groups:read,"
+        "channels:join,"
+        "im:history,"
+        "users:read,"
+        "users:read.email,"
+        "usergroups:read"
+    )
+
+    REDIRECT_URI = f"{WEB_DOMAIN}/admin/connectors/slack/oauth/callback"
+    DEV_REDIRECT_URI = f"https://redirectmeto.com/{REDIRECT_URI}"
+
+    @classmethod
+    def generate_oauth_url(cls, state: str) -> str:
+        return cls._generate_oauth_url_helper(cls.REDIRECT_URI, state)
+
+    @classmethod
+    def generate_dev_oauth_url(cls, state: str) -> str:
+        """dev mode workaround for localhost testing
+        - https://www.nango.dev/blog/oauth-redirects-on-localhost-with-https
+        """
+
+        return cls._generate_oauth_url_helper(cls.DEV_REDIRECT_URI, state)
+
+    @classmethod
+    def _generate_oauth_url_helper(cls, redirect_uri: str, state: str) -> str:
+        url = (
+            f"https://slack.com/oauth/v2/authorize"
+            f"?client_id={cls.CLIENT_ID}"
+            f"&redirect_uri={redirect_uri}"
+            f"&scope={cls.BOT_SCOPE}"
+            f"&state={state}"
+        )
+        return url
+
+    @classmethod
+    def session_dump_json(cls, email: str, redirect_on_success: str | None) -> str:
+        """Temporary state to store in redis. to be looked up on auth response.
+        Returns a json string.
+        """
+        session = SlackOAuth.OAuthSession(
+            email=email, redirect_on_success=redirect_on_success
+        )
+        return session.model_dump_json()
+
+    @classmethod
+    def parse_session(cls, session_json: str) -> OAuthSession:
+        session = SlackOAuth.OAuthSession.model_validate_json(session_json)
+        return session
+
+
+@router.post("/connector/slack/callback")
+def handle_slack_oauth_callback(
+    code: str,
+    state: str,
+    user: User = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+    tenant_id: str | None = Depends(get_current_tenant_id),
+) -> JSONResponse:
+    if not SlackOAuth.CLIENT_ID or not SlackOAuth.CLIENT_SECRET:
+        raise HTTPException(
+            status_code=500,
+            detail="Slack client ID or client secret is not configured.",
+        )
+
+    r = get_redis_client(tenant_id=tenant_id)
+
+    # recover the state
+    padded_state = state + "=" * (
+        -len(state) % 4
+    )  # Add padding back (Base64 decoding requires padding)
+    uuid_bytes = base64.urlsafe_b64decode(
+        padded_state
+    )  # Decode the Base64 string back to bytes
+
+    # Convert bytes back to a UUID
+    oauth_uuid = uuid.UUID(bytes=uuid_bytes)
+    oauth_uuid_str = str(oauth_uuid)
+
+    r_key = f"da_oauth:{oauth_uuid_str}"
+
+    session_json_bytes = cast(bytes, r.get(r_key))
+    if not session_json_bytes:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Slack OAuth failed - OAuth state key not found: key={r_key}",
+        )
+
+    session_json = session_json_bytes.decode("utf-8")
+    try:
+        session = SlackOAuth.parse_session(session_json)
+
+        if not DEV_MODE:
+            redirect_uri = SlackOAuth.REDIRECT_URI
+        else:
+            redirect_uri = SlackOAuth.DEV_REDIRECT_URI
+
+        # Exchange the authorization code for an access token
+        response = requests.post(
+            SlackOAuth.TOKEN_URL,
+            headers={"Content-Type": "application/x-www-form-urlencoded"},
+            data={
+                "client_id": SlackOAuth.CLIENT_ID,
+                "client_secret": SlackOAuth.CLIENT_SECRET,
+                "code": code,
+                "redirect_uri": redirect_uri,
+            },
+        )
+
+        response_data = response.json()
+
+        if not response_data.get("ok"):
+            raise HTTPException(
+                status_code=400,
+                detail=f"Slack OAuth failed: {response_data.get('error')}",
+            )
+
+        # Extract token and team information
+        access_token: str = response_data.get("access_token")
+        team_id: str = response_data.get("team", {}).get("id")
+        authed_user_id: str = response_data.get("authed_user", {}).get("id")
+
+        credential_info = CredentialBase(
+            credential_json={"slack_bot_token": access_token},
+            admin_public=True,
+            source=DocumentSource.SLACK,
+            name="Slack OAuth",
+        )
+
+        create_credential(credential_info, user, db_session)
+    except Exception as e:
+        return JSONResponse(
+            status_code=500,
+            content={
+                "success": False,
+                "message": f"An error occurred during Slack OAuth: {str(e)}",
+            },
+        )
+    finally:
+        r.delete(r_key)
+
+    # return the result
+    return JSONResponse(
+        content={
+            "success": True,
+            "message": "Slack OAuth completed successfully.",
+            "finalize_url": None,
+            "redirect_on_success": session.redirect_on_success,
+            "team_id": team_id,
+            "authed_user_id": authed_user_id,
+        }
+    )
--- a/backend/ee/onyx/server/query_and_chat/query_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/query_backend.py
@@ -83,6 +83,7 @@ def handle_search_request(
        user=user,
        llm=llm,
        fast_llm=fast_llm,
+        skip_query_analysis=False,
        db_session=db_session,
        bypass_acl=False,
    )
--- a/backend/ee/onyx/server/query_and_chat/token_limit.py
+++ b/backend/ee/onyx/server/query_and_chat/token_limit.py
@@ -13,7 +13,7 @@ from sqlalchemy import select
 from sqlalchemy.orm import Session

 from onyx.db.api_key import is_api_key_email_address
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.models import ChatMessage
 from onyx.db.models import ChatSession
 from onyx.db.models import TokenRateLimit
@@ -28,21 +28,21 @@ from onyx.server.query_and_chat.token_limit import _user_is_rate_limited_by_glob
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel


-def _check_token_rate_limits(user: User | None, tenant_id: str | None) -> None:
+def _check_token_rate_limits(user: User | None) -> None:
    if user is None:
        # Unauthenticated users are only rate limited by global settings
-        _user_is_rate_limited_by_global(tenant_id)
+        _user_is_rate_limited_by_global()

    elif is_api_key_email_address(user.email):
        # API keys are only rate limited by global settings
-        _user_is_rate_limited_by_global(tenant_id)
+        _user_is_rate_limited_by_global()

    else:
        run_functions_tuples_in_parallel(
            [
-                (_user_is_rate_limited, (user.id, tenant_id)),
-                (_user_is_rate_limited_by_group, (user.id, tenant_id)),
-                (_user_is_rate_limited_by_global, (tenant_id,)),
+                (_user_is_rate_limited, (user.id,)),
+                (_user_is_rate_limited_by_group, (user.id,)),
+                (_user_is_rate_limited_by_global, ()),
            ]
        )

@@ -52,8 +52,8 @@ User rate limits
 """


-def _user_is_rate_limited(user_id: UUID, tenant_id: str | None) -> None:
-    with get_session_with_tenant(tenant_id) as db_session:
+def _user_is_rate_limited(user_id: UUID) -> None:
+    with get_session_with_current_tenant() as db_session:
        user_rate_limits = fetch_all_user_token_rate_limits(
            db_session=db_session, enabled_only=True, ordered=False
        )
@@ -93,8 +93,8 @@ User Group rate limits
 """


-def _user_is_rate_limited_by_group(user_id: UUID, tenant_id: str | None) -> None:
-    with get_session_with_tenant(tenant_id) as db_session:
+def _user_is_rate_limited_by_group(user_id: UUID) -> None:
+    with get_session_with_current_tenant() as db_session:
        group_rate_limits = _fetch_all_user_group_rate_limits(user_id, db_session)

        if group_rate_limits:
--- a/backend/ee/onyx/server/query_history/api.py
+++ b/backend/ee/onyx/server/query_history/api.py
@@ -2,6 +2,7 @@ import csv
 import io
 from datetime import datetime
 from datetime import timezone
+from http import HTTPStatus
 from uuid import UUID

 from fastapi import APIRouter
@@ -21,8 +22,10 @@ from ee.onyx.server.query_history.models import QuestionAnswerPairSnapshot
 from onyx.auth.users import current_admin_user
 from onyx.auth.users import get_display_email
 from onyx.chat.chat_utils import create_chat_chain
+from onyx.configs.app_configs import ONYX_QUERY_HISTORY_TYPE
 from onyx.configs.constants import MessageType
 from onyx.configs.constants import QAFeedbackType
+from onyx.configs.constants import QueryHistoryType
 from onyx.configs.constants import SessionType
 from onyx.db.chat import get_chat_session_by_id
 from onyx.db.chat import get_chat_sessions_by_user
@@ -35,6 +38,8 @@ from onyx.server.query_and_chat.models import ChatSessionsResponse

 router = APIRouter()

+ONYX_ANONYMIZED_EMAIL = "anonymous@anonymous.invalid"
+

 def fetch_and_process_chat_session_history(
    db_session: Session,
@@ -107,6 +112,17 @@ def get_user_chat_sessions(
    _: User | None = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
 ) -> ChatSessionsResponse:
+    # we specifically don't allow this endpoint if "anonymized" since
+    # this is a direct query on the user id
+    if ONYX_QUERY_HISTORY_TYPE in [
+        QueryHistoryType.DISABLED,
+        QueryHistoryType.ANONYMIZED,
+    ]:
+        raise HTTPException(
+            status_code=HTTPStatus.FORBIDDEN,
+            detail="Per user query history has been disabled by the administrator.",
+        )
+
    try:
        chat_sessions = get_chat_sessions_by_user(
            user_id=user_id, deleted=False, db_session=db_session, limit=0
@@ -122,6 +138,7 @@ def get_user_chat_sessions(
                name=chat.description,
                persona_id=chat.persona_id,
                time_created=chat.time_created.isoformat(),
+                time_updated=chat.time_updated.isoformat(),
                shared_status=chat.shared_status,
                folder_id=chat.folder_id,
                current_alternate_model=chat.current_alternate_model,
@@ -141,6 +158,12 @@ def get_chat_session_history(
    _: User | None = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
 ) -> PaginatedReturn[ChatSessionMinimal]:
+    if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.DISABLED:
+        raise HTTPException(
+            status_code=HTTPStatus.FORBIDDEN,
+            detail="Query history has been disabled by the administrator.",
+        )
+
    page_of_chat_sessions = get_page_of_chat_sessions(
        page_num=page_num,
        page_size=page_size,
@@ -157,11 +180,16 @@ def get_chat_session_history(
        feedback_filter=feedback_type,
    )

+    minimal_chat_sessions: list[ChatSessionMinimal] = []
+
+    for chat_session in page_of_chat_sessions:
+        minimal_chat_session = ChatSessionMinimal.from_chat_session(chat_session)
+        if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:
+            minimal_chat_session.user_email = ONYX_ANONYMIZED_EMAIL
+        minimal_chat_sessions.append(minimal_chat_session)
+
    return PaginatedReturn(
-        items=[
-            ChatSessionMinimal.from_chat_session(chat_session)
-            for chat_session in page_of_chat_sessions
-        ],
+        items=minimal_chat_sessions,
        total_items=total_filtered_chat_sessions_count,
    )

@@ -172,6 +200,12 @@ def get_chat_session_admin(
    _: User | None = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
 ) -> ChatSessionSnapshot:
+    if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.DISABLED:
+        raise HTTPException(
+            status_code=HTTPStatus.FORBIDDEN,
+            detail="Query history has been disabled by the administrator.",
+        )
+
    try:
        chat_session = get_chat_session_by_id(
            chat_session_id=chat_session_id,
@@ -193,6 +227,9 @@ def get_chat_session_admin(
            f"Could not create snapshot for chat session with id '{chat_session_id}'",
        )

+    if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:
+        snapshot.user_email = ONYX_ANONYMIZED_EMAIL
+
    return snapshot


@@ -203,6 +240,12 @@ def get_query_history_as_csv(
    end: datetime | None = None,
    db_session: Session = Depends(get_session),
 ) -> StreamingResponse:
+    if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.DISABLED:
+        raise HTTPException(
+            status_code=HTTPStatus.FORBIDDEN,
+            detail="Query history has been disabled by the administrator.",
+        )
+
    complete_chat_session_history = fetch_and_process_chat_session_history(
        db_session=db_session,
        start=start or datetime.fromtimestamp(0, tz=timezone.utc),
@@ -213,6 +256,9 @@ def get_query_history_as_csv(

    question_answer_pairs: list[QuestionAnswerPairSnapshot] = []
    for chat_session_snapshot in complete_chat_session_history:
+        if ONYX_QUERY_HISTORY_TYPE == QueryHistoryType.ANONYMIZED:
+            chat_session_snapshot.user_email = ONYX_ANONYMIZED_EMAIL
+
        question_answer_pairs.extend(
            QuestionAnswerPairSnapshot.from_chat_session_snapshot(chat_session_snapshot)
        )
--- a/backend/ee/onyx/server/tenants/api.py
+++ b/backend/ee/onyx/server/tenants/api.py
@@ -18,11 +18,16 @@ from ee.onyx.server.tenants.anonymous_user_path import (
 from ee.onyx.server.tenants.anonymous_user_path import modify_anonymous_user_path
 from ee.onyx.server.tenants.anonymous_user_path import validate_anonymous_user_path
 from ee.onyx.server.tenants.billing import fetch_billing_information
+from ee.onyx.server.tenants.billing import fetch_stripe_checkout_session
 from ee.onyx.server.tenants.billing import fetch_tenant_stripe_information
 from ee.onyx.server.tenants.models import AnonymousUserPath
 from ee.onyx.server.tenants.models import BillingInformation
 from ee.onyx.server.tenants.models import ImpersonateRequest
 from ee.onyx.server.tenants.models import ProductGatingRequest
+from ee.onyx.server.tenants.models import ProductGatingResponse
+from ee.onyx.server.tenants.models import SubscriptionSessionResponse
+from ee.onyx.server.tenants.models import SubscriptionStatusResponse
+from ee.onyx.server.tenants.product_gating import store_product_gating
 from ee.onyx.server.tenants.provisioning import delete_user_from_control_plane
 from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
 from ee.onyx.server.tenants.user_mapping import remove_all_users_from_tenant
@@ -36,17 +41,15 @@ from onyx.auth.users import User
 from onyx.configs.app_configs import WEB_DOMAIN
 from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
 from onyx.db.auth import get_user_count
-from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session
+from onyx.db.engine import get_session_with_shared_schema
 from onyx.db.engine import get_session_with_tenant
-from onyx.db.notification import create_notification
 from onyx.db.users import delete_user_from_db
 from onyx.db.users import get_user_by_email
 from onyx.server.manage.models import UserByEmail
-from onyx.server.settings.store import load_settings
-from onyx.server.settings.store import store_settings
 from onyx.utils.logger import setup_logger
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
+from shared_configs.contextvars import get_current_tenant_id

 stripe.api_key = STRIPE_SECRET_KEY
 logger = setup_logger()
@@ -55,13 +58,14 @@ router = APIRouter(prefix="/tenants")

@router.get("/anonymous-user-path")
 async def get_anonymous_user_path_api(
-    tenant_id: str | None = Depends(get_current_tenant_id),
    _: User | None = Depends(current_admin_user),
 ) -> AnonymousUserPath:
+    tenant_id = get_current_tenant_id()
+
    if tenant_id is None:
        raise HTTPException(status_code=404, detail="Tenant not found")

-    with get_session_with_tenant(tenant_id=None) as db_session:
+    with get_session_with_shared_schema() as db_session:
        current_path = get_anonymous_user_path(tenant_id, db_session)

    return AnonymousUserPath(anonymous_user_path=current_path)
@@ -70,15 +74,15 @@ async def get_anonymous_user_path_api(
@router.post("/anonymous-user-path")
 async def set_anonymous_user_path_api(
    anonymous_user_path: str,
-    tenant_id: str = Depends(get_current_tenant_id),
    _: User | None = Depends(current_admin_user),
 ) -> None:
+    tenant_id = get_current_tenant_id()
    try:
        validate_anonymous_user_path(anonymous_user_path)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

-    with get_session_with_tenant(tenant_id=None) as db_session:
+    with get_session_with_shared_schema() as db_session:
        try:
            modify_anonymous_user_path(tenant_id, anonymous_user_path, db_session)
        except IntegrityError:
@@ -99,7 +103,7 @@ async def login_as_anonymous_user(
    anonymous_user_path: str,
    _: User | None = Depends(optional_user),
 ) -> Response:
-    with get_session_with_tenant(tenant_id=None) as db_session:
+    with get_session_with_shared_schema() as db_session:
        tenant_id = get_tenant_id_for_anonymous_user_path(
            anonymous_user_path, db_session
        )
@@ -126,52 +130,48 @@ async def login_as_anonymous_user(
@router.post("/product-gating")
 def gate_product(
    product_gating_request: ProductGatingRequest, _: None = Depends(control_plane_dep)
-) -> None:
+) -> ProductGatingResponse:
    """
    Gating the product means that the product is not available to the tenant.
    They will be directed to the billing page.
-    We gate the product when
-    1) User has ended free trial without adding payment method
-    2) User's card has declined
+    We gate the product when their subscription has ended.
    """
-    tenant_id = product_gating_request.tenant_id
-    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+    try:
+        store_product_gating(
+            product_gating_request.tenant_id, product_gating_request.application_status
+        )
+        return ProductGatingResponse(updated=True, error=None)

-    settings = load_settings()
-    settings.product_gating = product_gating_request.product_gating
-    store_settings(settings)
-
-    if product_gating_request.notification:
-        with get_session_with_tenant(tenant_id) as db_session:
-            create_notification(None, product_gating_request.notification, db_session)
-
-    if token is not None:
-        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
+    except Exception as e:
+        logger.exception("Failed to gate product")
+        return ProductGatingResponse(updated=False, error=str(e))


-@router.get("/billing-information", response_model=BillingInformation)
+@router.get("/billing-information")
 async def billing_information(
    _: User = Depends(current_admin_user),
-) -> BillingInformation:
+) -> BillingInformation | SubscriptionStatusResponse:
    logger.info("Fetching billing information")
-    return BillingInformation(
-        **fetch_billing_information(CURRENT_TENANT_ID_CONTEXTVAR.get())
-    )
+    tenant_id = get_current_tenant_id()
+    return fetch_billing_information(tenant_id)


@router.post("/create-customer-portal-session")
-async def create_customer_portal_session(_: User = Depends(current_admin_user)) -> dict:
+async def create_customer_portal_session(
+    _: User = Depends(current_admin_user),
+) -> dict:
+    tenant_id = get_current_tenant_id()
+
    try:
-        # Fetch tenant_id and current tenant's information
-        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
        stripe_info = fetch_tenant_stripe_information(tenant_id)
        stripe_customer_id = stripe_info.get("stripe_customer_id")
        if not stripe_customer_id:
            raise HTTPException(status_code=400, detail="Stripe customer ID not found")
        logger.info(stripe_customer_id)
+
        portal_session = stripe.billing_portal.Session.create(
            customer=stripe_customer_id,
-            return_url=f"{WEB_DOMAIN}/admin/cloud-settings",
+            return_url=f"{WEB_DOMAIN}/admin/billing",
        )
        logger.info(portal_session)
        return {"url": portal_session.url}
@@ -180,6 +180,22 @@ async def create_customer_portal_session(_: User = Depends(current_admin_user))
        raise HTTPException(status_code=500, detail=str(e))


+@router.post("/create-subscription-session")
+async def create_subscription_session(
+    _: User = Depends(current_admin_user),
+) -> SubscriptionSessionResponse:
+    try:
+        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
+        if not tenant_id:
+            raise HTTPException(status_code=400, detail="Tenant ID not found")
+        session_id = fetch_stripe_checkout_session(tenant_id)
+        return SubscriptionSessionResponse(sessionId=session_id)
+
+    except Exception as e:
+        logger.exception("Failed to create resubscription session")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
@router.post("/impersonate")
 async def impersonate_user(
    impersonate_request: ImpersonateRequest,
@@ -188,7 +204,7 @@ async def impersonate_user(
    """Allows a cloud superuser to impersonate another user by generating an impersonation JWT token"""
    tenant_id = get_tenant_id_for_email(impersonate_request.email)

-    with get_session_with_tenant(tenant_id) as tenant_session:
+    with get_session_with_tenant(tenant_id=tenant_id) as tenant_session:
        user_to_impersonate = get_user_by_email(
            impersonate_request.email, tenant_session
        )
@@ -212,8 +228,9 @@ async def leave_organization(
    user_email: UserByEmail,
    current_user: User | None = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
-    tenant_id: str = Depends(get_current_tenant_id),
 ) -> None:
+    tenant_id = get_current_tenant_id()
+
    if current_user is None or current_user.email != user_email.user_email:
        raise HTTPException(
            status_code=403, detail="You can only leave the organization as yourself"
--- a/backend/ee/onyx/server/tenants/billing.py
+++ b/backend/ee/onyx/server/tenants/billing.py
@@ -6,6 +6,8 @@ import stripe
 from ee.onyx.configs.app_configs import STRIPE_PRICE_ID
 from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
 from ee.onyx.server.tenants.access import generate_data_plane_token
+from ee.onyx.server.tenants.models import BillingInformation
+from ee.onyx.server.tenants.models import SubscriptionStatusResponse
 from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
 from onyx.utils.logger import setup_logger

@@ -14,6 +16,19 @@ stripe.api_key = STRIPE_SECRET_KEY
 logger = setup_logger()


+def fetch_stripe_checkout_session(tenant_id: str) -> str:
+    token = generate_data_plane_token()
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+    }
+    url = f"{CONTROL_PLANE_API_BASE_URL}/create-checkout-session"
+    params = {"tenant_id": tenant_id}
+    response = requests.post(url, headers=headers, params=params)
+    response.raise_for_status()
+    return response.json()["sessionId"]
+
+
 def fetch_tenant_stripe_information(tenant_id: str) -> dict:
    token = generate_data_plane_token()
    headers = {
@@ -27,7 +42,9 @@ def fetch_tenant_stripe_information(tenant_id: str) -> dict:
    return response.json()


-def fetch_billing_information(tenant_id: str) -> dict:
+def fetch_billing_information(
+    tenant_id: str,
+) -> BillingInformation | SubscriptionStatusResponse:
    logger.info("Fetching billing information")
    token = generate_data_plane_token()
    headers = {
@@ -38,8 +55,19 @@ def fetch_billing_information(tenant_id: str) -> dict:
    params = {"tenant_id": tenant_id}
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
-    billing_info = response.json()
-    return billing_info
+
+    response_data = response.json()
+
+    # Check if the response indicates no subscription
+    if (
+        isinstance(response_data, dict)
+        and "subscribed" in response_data
+        and not response_data["subscribed"]
+    ):
+        return SubscriptionStatusResponse(**response_data)
+
+    # Otherwise, parse as BillingInformation
+    return BillingInformation(**response_data)


 def register_tenant_users(tenant_id: str, number_of_users: int) -> stripe.Subscription:
--- a/backend/ee/onyx/server/tenants/models.py
+++ b/backend/ee/onyx/server/tenants/models.py
@@ -1,7 +1,8 @@
+from datetime import datetime
+
 from pydantic import BaseModel

-from onyx.configs.constants import NotificationType
-from onyx.server.settings.models import GatingType
+from onyx.server.settings.models import ApplicationStatus


 class CheckoutSessionCreationRequest(BaseModel):
@@ -15,15 +16,24 @@ class CreateTenantRequest(BaseModel):

 class ProductGatingRequest(BaseModel):
    tenant_id: str
-    product_gating: GatingType
-    notification: NotificationType | None = None
+    application_status: ApplicationStatus
+
+
+class SubscriptionStatusResponse(BaseModel):
+    subscribed: bool


 class BillingInformation(BaseModel):
+    stripe_subscription_id: str
+    status: str
+    current_period_start: datetime
+    current_period_end: datetime
+    number_of_seats: int
+    cancel_at_period_end: bool
+    canceled_at: datetime | None
+    trial_start: datetime | None
+    trial_end: datetime | None
    seats: int
-    subscription_status: str
-    billing_start: str
-    billing_end: str
    payment_method_enabled: bool


@@ -48,3 +58,12 @@ class TenantDeletionPayload(BaseModel):

 class AnonymousUserPath(BaseModel):
    anonymous_user_path: str | None
+
+
+class ProductGatingResponse(BaseModel):
+    updated: bool
+    error: str | None
+
+
+class SubscriptionSessionResponse(BaseModel):
+    sessionId: str
--- a/backend/ee/onyx/server/tenants/product_gating.py
+++ b/backend/ee/onyx/server/tenants/product_gating.py
@@ -0,0 +1,52 @@
+from typing import cast
+
+from ee.onyx.configs.app_configs import GATED_TENANTS_KEY
+from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
+from onyx.redis.redis_pool import get_redis_client
+from onyx.redis.redis_pool import get_redis_replica_client
+from onyx.server.settings.models import ApplicationStatus
+from onyx.server.settings.store import load_settings
+from onyx.server.settings.store import store_settings
+from onyx.setup import setup_logger
+from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
+
+logger = setup_logger()
+
+
+def update_tenant_gating(tenant_id: str, status: ApplicationStatus) -> None:
+    redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)
+
+    # Store the full status
+    status_key = f"tenant:{tenant_id}:status"
+    redis_client.set(status_key, status.value)
+
+    # Maintain the GATED_ACCESS set
+    if status == ApplicationStatus.GATED_ACCESS:
+        redis_client.sadd(GATED_TENANTS_KEY, tenant_id)
+    else:
+        redis_client.srem(GATED_TENANTS_KEY, tenant_id)
+
+
+def store_product_gating(tenant_id: str, application_status: ApplicationStatus) -> None:
+    try:
+        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+
+        settings = load_settings()
+        settings.application_status = application_status
+        store_settings(settings)
+
+        # Store gated tenant information in Redis
+        update_tenant_gating(tenant_id, application_status)
+
+        if token is not None:
+            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
+
+    except Exception:
+        logger.exception("Failed to gate product")
+        raise
+
+
+def get_gated_tenants() -> set[str]:
+    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
+    gated_tenants_bytes = cast(set[bytes], redis_client.smembers(GATED_TENANTS_KEY))
+    return {tenant_id.decode("utf-8") for tenant_id in gated_tenants_bytes}
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -55,7 +55,11 @@ logger = logging.getLogger(__name__)
 async def get_or_provision_tenant(
    email: str, referral_source: str | None = None, request: Request | None = None
 ) -> str:
-    """Get existing tenant ID for an email or create a new tenant if none exists."""
+    """
+    Get existing tenant ID for an email or create a new tenant if none exists.
+    This function should only be called after we have verified we want this user's tenant to exist.
+    It returns the tenant ID associated with the email, creating a new tenant if necessary.
+    """
    if not MULTI_TENANT:
        return POSTGRES_DEFAULT_SCHEMA

@@ -104,21 +108,21 @@ async def provision_tenant(tenant_id: str, email: str) -> None:
            status_code=409, detail="User already belongs to an organization"
        )

-    logger.info(f"Provisioning tenant: {tenant_id}")
+    logger.debug(f"Provisioning tenant {tenant_id} for user {email}")
    token = None

    try:
        if not create_schema_if_not_exists(tenant_id):
-            logger.info(f"Created schema for tenant {tenant_id}")
+            logger.debug(f"Created schema for tenant {tenant_id}")
        else:
-            logger.info(f"Schema already exists for tenant {tenant_id}")
+            logger.debug(f"Schema already exists for tenant {tenant_id}")

        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

        # Await the Alembic migrations
        await asyncio.to_thread(run_alembic_migrations, tenant_id)

-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            configure_default_api_keys(db_session)

            current_search_settings = (
@@ -134,7 +138,7 @@ async def provision_tenant(tenant_id: str, email: str) -> None:

        add_users_to_tenant([email], tenant_id)

-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            create_milestone_and_report(
                user=None,
                distinct_id=tenant_id,
@@ -200,33 +204,15 @@ async def rollback_tenant_provisioning(tenant_id: str) -> None:


 def configure_default_api_keys(db_session: Session) -> None:
-    if OPENAI_DEFAULT_API_KEY:
-        open_provider = LLMProviderUpsertRequest(
-            name="OpenAI",
-            provider=OPENAI_PROVIDER_NAME,
-            api_key=OPENAI_DEFAULT_API_KEY,
-            default_model_name="gpt-4",
-            fast_default_model_name="gpt-4o-mini",
-            model_names=OPEN_AI_MODEL_NAMES,
-        )
-        try:
-            full_provider = upsert_llm_provider(open_provider, db_session)
-            update_default_provider(full_provider.id, db_session)
-        except Exception as e:
-            logger.error(f"Failed to configure OpenAI provider: {e}")
-    else:
-        logger.error(
-            "OPENAI_DEFAULT_API_KEY not set, skipping OpenAI provider configuration"
-        )
-
    if ANTHROPIC_DEFAULT_API_KEY:
        anthropic_provider = LLMProviderUpsertRequest(
            name="Anthropic",
            provider=ANTHROPIC_PROVIDER_NAME,
            api_key=ANTHROPIC_DEFAULT_API_KEY,
-            default_model_name="claude-3-5-sonnet-20241022",
+            default_model_name="claude-3-7-sonnet-20250219",
            fast_default_model_name="claude-3-5-sonnet-20241022",
            model_names=ANTHROPIC_MODEL_NAMES,
+            display_model_names=["claude-3-5-sonnet-20241022"],
        )
        try:
            full_provider = upsert_llm_provider(anthropic_provider, db_session)
@@ -238,6 +224,26 @@ def configure_default_api_keys(db_session: Session) -> None:
            "ANTHROPIC_DEFAULT_API_KEY not set, skipping Anthropic provider configuration"
        )

+    if OPENAI_DEFAULT_API_KEY:
+        open_provider = LLMProviderUpsertRequest(
+            name="OpenAI",
+            provider=OPENAI_PROVIDER_NAME,
+            api_key=OPENAI_DEFAULT_API_KEY,
+            default_model_name="gpt-4o",
+            fast_default_model_name="gpt-4o-mini",
+            model_names=OPEN_AI_MODEL_NAMES,
+            display_model_names=["o1", "o3-mini", "gpt-4o", "gpt-4o-mini"],
+        )
+        try:
+            full_provider = upsert_llm_provider(open_provider, db_session)
+            update_default_provider(full_provider.id, db_session)
+        except Exception as e:
+            logger.error(f"Failed to configure OpenAI provider: {e}")
+    else:
+        logger.error(
+            "OPENAI_DEFAULT_API_KEY not set, skipping OpenAI provider configuration"
+        )
+
    if COHERE_DEFAULT_API_KEY:
        cloud_embedding_provider = CloudEmbeddingProviderCreationRequest(
            provider_type=EmbeddingProvider.COHERE,
--- a/backend/ee/onyx/server/tenants/user_mapping.py
+++ b/backend/ee/onyx/server/tenants/user_mapping.py
@@ -28,7 +28,7 @@ def get_tenant_id_for_email(email: str) -> str:


 def user_owns_a_tenant(email: str) -> bool:
-    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
+    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
        result = (
            db_session.query(UserTenantMapping)
            .filter(UserTenantMapping.email == email)
@@ -38,7 +38,7 @@ def user_owns_a_tenant(email: str) -> bool:


 def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
-    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
+    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
        try:
            for email in emails:
                db_session.add(UserTenantMapping(email=email, tenant_id=tenant_id))
@@ -48,7 +48,7 @@ def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:


 def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:
-    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
+    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
        try:
            mappings_to_delete = (
                db_session.query(UserTenantMapping)
@@ -71,7 +71,7 @@ def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:


 def remove_all_users_from_tenant(tenant_id: str) -> None:
-    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
+    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as db_session:
        db_session.query(UserTenantMapping).filter(
            UserTenantMapping.tenant_id == tenant_id
        ).delete()
--- a/backend/model_server/constants.py
+++ b/backend/model_server/constants.py
@@ -6,7 +6,7 @@ MODEL_WARM_UP_STRING = "hi " * 512
 DEFAULT_OPENAI_MODEL = "text-embedding-3-small"
 DEFAULT_COHERE_MODEL = "embed-english-light-v3.0"
 DEFAULT_VOYAGE_MODEL = "voyage-large-2-instruct"
-DEFAULT_VERTEX_MODEL = "text-embedding-004"
+DEFAULT_VERTEX_MODEL = "text-embedding-005"


 class EmbeddingModelTextType:
@@ -28,3 +28,9 @@ class EmbeddingModelTextType:
    @staticmethod
    def get_type(provider: EmbeddingProvider, text_type: EmbedTextType) -> str:
        return EmbeddingModelTextType.PROVIDER_TEXT_TYPE_MAP[provider][text_type]
+
+
+class GPUStatus:
+    CUDA = "cuda"
+    MAC_MPS = "mps"
+    NONE = "none"
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -5,6 +5,7 @@ from types import TracebackType
 from typing import cast
 from typing import Optional

+import aioboto3  # type: ignore
 import httpx
 import openai
 import vertexai  # type: ignore
@@ -12,6 +13,7 @@ import voyageai  # type: ignore
 from cohere import AsyncClient as CohereAsyncClient
 from fastapi import APIRouter
 from fastapi import HTTPException
+from fastapi import Request
 from google.oauth2 import service_account  # type: ignore
 from litellm import aembedding
 from litellm.exceptions import RateLimitError
@@ -27,11 +29,13 @@ from model_server.constants import DEFAULT_VERTEX_MODEL
 from model_server.constants import DEFAULT_VOYAGE_MODEL
 from model_server.constants import EmbeddingModelTextType
 from model_server.constants import EmbeddingProvider
+from model_server.utils import pass_aws_key
 from model_server.utils import simple_log_function_time
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import API_BASED_EMBEDDING_TIMEOUT
 from shared_configs.configs import INDEXING_ONLY
 from shared_configs.configs import OPENAI_EMBEDDING_TIMEOUT
+from shared_configs.configs import VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE
 from shared_configs.enums import EmbedTextType
 from shared_configs.enums import RerankerProvider
 from shared_configs.model_server_models import Embedding
@@ -77,7 +81,7 @@ class CloudEmbedding:
        self._closed = False

    async def _embed_openai(
-        self, texts: list[str], model: str | None
+        self, texts: list[str], model: str | None, reduced_dimension: int | None
    ) -> list[Embedding]:
        if not model:
            model = DEFAULT_OPENAI_MODEL
@@ -90,19 +94,28 @@ class CloudEmbedding:
        final_embeddings: list[Embedding] = []
        try:
            for text_batch in batch_list(texts, _OPENAI_MAX_INPUT_LEN):
-                response = await client.embeddings.create(input=text_batch, model=model)
+                response = await client.embeddings.create(
+                    input=text_batch,
+                    model=model,
+                    dimensions=reduced_dimension or openai.NOT_GIVEN,
+                )
                final_embeddings.extend(
                    [embedding.embedding for embedding in response.data]
                )
            return final_embeddings
        except Exception as e:
            error_string = (
-                f"Error embedding text with OpenAI: {str(e)} \n"
-                f"Model: {model} \n"
-                f"Provider: {self.provider} \n"
-                f"Texts: {texts}"
+                f"Exception embedding text with OpenAI - {type(e)}: "
+                f"Model: {model} "
+                f"Provider: {self.provider} "
+                f"Exception: {e}"
            )
            logger.error(error_string)
+
+            # only log text when it's not an authentication error.
+            if not isinstance(e, openai.AuthenticationError):
+                logger.debug(f"Exception texts: {texts}")
+
            raise RuntimeError(error_string)

    async def _embed_cohere(
@@ -172,17 +185,24 @@ class CloudEmbedding:
        vertexai.init(project=project_id, credentials=credentials)
        client = TextEmbeddingModel.from_pretrained(model)

-        embeddings = await client.get_embeddings_async(
-            [
-                TextEmbeddingInput(
-                    text,
-                    embedding_type,
-                )
-                for text in texts
-            ],
-            auto_truncate=True,  # This is the default
-        )
-        return [embedding.values for embedding in embeddings]
+        inputs = [TextEmbeddingInput(text, embedding_type) for text in texts]
+
+        # Split into batches of 25 texts
+        max_texts_per_batch = VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE
+        batches = [
+            inputs[i : i + max_texts_per_batch]
+            for i in range(0, len(inputs), max_texts_per_batch)
+        ]
+
+        # Dispatch all embedding calls asynchronously at once
+        tasks = [
+            client.get_embeddings_async(batch, auto_truncate=True) for batch in batches
+        ]
+
+        # Wait for all tasks to complete in parallel
+        results = await asyncio.gather(*tasks)
+
+        return [embedding.values for batch in results for embedding in batch]

    async def _embed_litellm_proxy(
        self, texts: list[str], model_name: str | None
@@ -217,9 +237,10 @@ class CloudEmbedding:
        text_type: EmbedTextType,
        model_name: str | None = None,
        deployment_name: str | None = None,
+        reduced_dimension: int | None = None,
    ) -> list[Embedding]:
        if self.provider == EmbeddingProvider.OPENAI:
-            return await self._embed_openai(texts, model_name)
+            return await self._embed_openai(texts, model_name, reduced_dimension)
        elif self.provider == EmbeddingProvider.AZURE:
            return await self._embed_azure(texts, f"azure/{deployment_name}")
        elif self.provider == EmbeddingProvider.LITELLM:
@@ -320,6 +341,8 @@ async def embed_text(
    prefix: str | None,
    api_url: str | None,
    api_version: str | None,
+    reduced_dimension: int | None,
+    gpu_type: str = "UNKNOWN",
 ) -> list[Embedding]:
    if not all(texts):
        logger.error("Empty strings provided for embedding")
@@ -362,6 +385,7 @@ async def embed_text(
                model_name=model_name,
                deployment_name=deployment_name,
                text_type=text_type,
+                reduced_dimension=reduced_dimension,
            )

        if any(embedding is None for embedding in embeddings):
@@ -373,8 +397,11 @@ async def embed_text(

        elapsed = time.monotonic() - start
        logger.info(
-            f"Successfully embedded {len(texts)} texts with {total_chars} total characters "
-            f"with provider {provider_type} in {elapsed:.2f}"
+            f"event=embedding_provider "
+            f"texts={len(texts)} "
+            f"chars={total_chars} "
+            f"provider={provider_type} "
+            f"elapsed={elapsed:.2f}"
        )
    elif model_name is not None:
        logger.info(
@@ -403,6 +430,14 @@ async def embed_text(
            f"Successfully embedded {len(texts)} texts with {total_chars} total characters "
            f"with local model {model_name} in {elapsed:.2f}"
        )
+        logger.info(
+            f"event=embedding_model "
+            f"texts={len(texts)} "
+            f"chars={total_chars} "
+            f"model={model_name} "
+            f"gpu={gpu_type} "
+            f"elapsed={elapsed:.2f}"
+        )
    else:
        logger.error("Neither model name nor provider specified for embedding")
        raise ValueError(
@@ -422,7 +457,7 @@ async def local_rerank(query: str, docs: list[str], model_name: str) -> list[flo
    )


-async def cohere_rerank(
+async def cohere_rerank_api(
    query: str, docs: list[str], model_name: str, api_key: str
 ) -> list[float]:
    cohere_client = CohereAsyncClient(api_key=api_key)
@@ -432,6 +467,45 @@ async def cohere_rerank(
    return [result.relevance_score for result in sorted_results]


+async def cohere_rerank_aws(
+    query: str,
+    docs: list[str],
+    model_name: str,
+    region_name: str,
+    aws_access_key_id: str,
+    aws_secret_access_key: str,
+) -> list[float]:
+    session = aioboto3.Session(
+        aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key
+    )
+    async with session.client(
+        "bedrock-runtime", region_name=region_name
+    ) as bedrock_client:
+        body = json.dumps(
+            {
+                "query": query,
+                "documents": docs,
+                "api_version": 2,
+            }
+        )
+        # Invoke the Bedrock model asynchronously
+        response = await bedrock_client.invoke_model(
+            modelId=model_name,
+            accept="application/json",
+            contentType="application/json",
+            body=body,
+        )
+
+        # Read the response asynchronously
+        response_body = json.loads(await response["body"].read())
+
+        # Extract and sort the results
+        results = response_body.get("results", [])
+        sorted_results = sorted(results, key=lambda item: item["index"])
+
+        return [result["relevance_score"] for result in sorted_results]
+
+
 async def litellm_rerank(
    query: str, docs: list[str], api_url: str, model_name: str, api_key: str | None
 ) -> list[float]:
@@ -455,8 +529,15 @@ async def litellm_rerank(


@router.post("/bi-encoder-embed")
-async def process_embed_request(
+async def route_bi_encoder_embed(
+    request: Request,
    embed_request: EmbedRequest,
+) -> EmbedResponse:
+    return await process_embed_request(embed_request, request.app.state.gpu_type)
+
+
+async def process_embed_request(
+    embed_request: EmbedRequest, gpu_type: str = "UNKNOWN"
 ) -> EmbedResponse:
    if not embed_request.texts:
        raise HTTPException(status_code=400, detail="No texts to be embedded")
@@ -483,7 +564,9 @@ async def process_embed_request(
            text_type=embed_request.text_type,
            api_url=embed_request.api_url,
            api_version=embed_request.api_version,
+            reduced_dimension=embed_request.reduced_dimension,
            prefix=prefix,
+            gpu_type=gpu_type,
        )
        return EmbedResponse(embeddings=embeddings)
    except RateLimitError as e:
@@ -538,15 +621,32 @@ async def process_rerank_request(rerank_request: RerankRequest) -> RerankRespons
        elif rerank_request.provider_type == RerankerProvider.COHERE:
            if rerank_request.api_key is None:
                raise RuntimeError("Cohere Rerank Requires an API Key")
-            sim_scores = await cohere_rerank(
+            sim_scores = await cohere_rerank_api(
                query=rerank_request.query,
                docs=rerank_request.documents,
                model_name=rerank_request.model_name,
                api_key=rerank_request.api_key,
            )
            return RerankResponse(scores=sim_scores)
+
+        elif rerank_request.provider_type == RerankerProvider.BEDROCK:
+            if rerank_request.api_key is None:
+                raise RuntimeError("Bedrock Rerank Requires an API Key")
+            aws_access_key_id, aws_secret_access_key, aws_region = pass_aws_key(
+                rerank_request.api_key
+            )
+            sim_scores = await cohere_rerank_aws(
+                query=rerank_request.query,
+                docs=rerank_request.documents,
+                model_name=rerank_request.model_name,
+                region_name=aws_region,
+                aws_access_key_id=aws_access_key_id,
+                aws_secret_access_key=aws_secret_access_key,
+            )
+            return RerankResponse(scores=sim_scores)
        else:
            raise ValueError(f"Unsupported provider: {rerank_request.provider_type}")
+
    except Exception as e:
        logger.exception(f"Error during reranking process:\n{str(e)}")
        raise HTTPException(
--- a/backend/model_server/main.py
+++ b/backend/model_server/main.py
@@ -16,6 +16,7 @@ from model_server.custom_models import router as custom_models_router
 from model_server.custom_models import warm_up_intent_model
 from model_server.encoders import router as encoders_router
 from model_server.management_endpoints import router as management_router
+from model_server.utils import get_gpu_type
 from onyx import __version__
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import INDEXING_ONLY
@@ -58,12 +59,10 @@ def _move_files_recursively(source: Path, dest: Path, overwrite: bool = False) -

@asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncGenerator:
-    if torch.cuda.is_available():
-        logger.notice("CUDA GPU is available")
-    elif torch.backends.mps.is_available():
-        logger.notice("Mac MPS is available")
-    else:
-        logger.notice("GPU is not available, using CPU")
+    gpu_type = get_gpu_type()
+    logger.notice(f"Torch GPU Detection: gpu_type={gpu_type}")
+
+    app.state.gpu_type = gpu_type

    if TEMP_HF_CACHE_PATH.is_dir():
        logger.notice("Moving contents of temp_huggingface to huggingface cache.")
--- a/backend/model_server/management_endpoints.py
+++ b/backend/model_server/management_endpoints.py
@@ -1,7 +1,9 @@
-import torch
 from fastapi import APIRouter
 from fastapi import Response

+from model_server.constants import GPUStatus
+from model_server.utils import get_gpu_type
+
 router = APIRouter(prefix="/api")


@@ -11,10 +13,7 @@ async def healthcheck() -> Response:


@router.get("/gpu-status")
-async def gpu_status() -> dict[str, bool | str]:
-    if torch.cuda.is_available():
-        return {"gpu_available": True, "type": "cuda"}
-    elif torch.backends.mps.is_available():
-        return {"gpu_available": True, "type": "mps"}
-    else:
-        return {"gpu_available": False, "type": "none"}
+async def route_gpu_status() -> dict[str, bool | str]:
+    gpu_type = get_gpu_type()
+    gpu_available = gpu_type != GPUStatus.NONE
+    return {"gpu_available": gpu_available, "type": gpu_type}
--- a/backend/model_server/utils.py
+++ b/backend/model_server/utils.py
@@ -8,6 +8,9 @@ from typing import Any
 from typing import cast
 from typing import TypeVar

+import torch
+
+from model_server.constants import GPUStatus
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -58,3 +61,41 @@ def simple_log_function_time(
            return cast(F, wrapped_sync_func)

    return decorator
+
+
+def get_gpu_type() -> str:
+    if torch.cuda.is_available():
+        return GPUStatus.CUDA
+    if torch.backends.mps.is_available():
+        return GPUStatus.MAC_MPS
+
+    return GPUStatus.NONE
+
+
+def pass_aws_key(api_key: str) -> tuple[str, str, str]:
+    """Parse AWS API key string into components.
+
+    Args:
+        api_key: String in format 'aws_ACCESSKEY_SECRETKEY_REGION'
+
+    Returns:
+        Tuple of (access_key, secret_key, region)
+
+    Raises:
+        ValueError: If key format is invalid
+    """
+    if not api_key.startswith("aws"):
+        raise ValueError("API key must start with 'aws' prefix")
+
+    parts = api_key.split("_")
+    if len(parts) != 4:
+        raise ValueError(
+            f"API key must be in format 'aws_ACCESSKEY_SECRETKEY_REGION', got {len(parts) - 1} parts"
+            "this is an onyx specific format for formatting the aws secrets for bedrock"
+        )
+
+    try:
+        _, aws_access_key_id, aws_secret_access_key, aws_region = parts
+        return aws_access_key_id, aws_secret_access_key, aws_region
+    except Exception as e:
+        raise ValueError(f"Failed to parse AWS key components: {str(e)}")
--- a/backend/onyx/agents/agent_search/basic/graph_builder.py
+++ b/backend/onyx/agents/agent_search/basic/graph_builder.py
@@ -3,17 +3,16 @@ from langgraph.graph import START
 from langgraph.graph import StateGraph

 from onyx.agents.agent_search.basic.states import BasicInput
+from onyx.agents.agent_search.basic.states import BasicOutput
 from onyx.agents.agent_search.basic.states import BasicState
-from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
-    basic_use_tool_response,
-)
-from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
+from onyx.agents.agent_search.orchestration.nodes.call_tool import call_tool
+from onyx.agents.agent_search.orchestration.nodes.choose_tool import choose_tool
 from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
    prepare_tool_input,
 )
-from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
-from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
-from onyx.configs.agent_configs import AGENT_MAX_TOOL_CALLS
+from onyx.agents.agent_search.orchestration.nodes.use_tool_response import (
+    basic_use_tool_response,
+)
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -23,7 +22,7 @@ def basic_graph_builder() -> StateGraph:
    graph = StateGraph(
        state_schema=BasicState,
        input=BasicInput,
-        output=ToolChoiceUpdate,
+        output=BasicOutput,
    )

    ### Add nodes ###
@@ -34,13 +33,13 @@ def basic_graph_builder() -> StateGraph:
    )

    graph.add_node(
-        node="llm_tool_choice",
-        action=llm_tool_choice,
+        node="choose_tool",
+        action=choose_tool,
    )

    graph.add_node(
-        node="tool_call",
-        action=tool_call,
+        node="call_tool",
+        action=call_tool,
    )

    graph.add_node(
@@ -52,24 +51,20 @@ def basic_graph_builder() -> StateGraph:

    graph.add_edge(start_key=START, end_key="prepare_tool_input")

-    graph.add_edge(start_key="prepare_tool_input", end_key="llm_tool_choice")
+    graph.add_edge(start_key="prepare_tool_input", end_key="choose_tool")

-    graph.add_conditional_edges("llm_tool_choice", should_continue, ["tool_call", END])
+    graph.add_conditional_edges("choose_tool", should_continue, ["call_tool", END])

    graph.add_edge(
-        start_key="tool_call",
+        start_key="call_tool",
        end_key="basic_use_tool_response",
    )

-    graph.add_conditional_edges(
-        "basic_use_tool_response", should_continue, ["tool_call", END]
+    graph.add_edge(
+        start_key="basic_use_tool_response",
+        end_key=END,
    )

-    # graph.add_edge(
-    #     start_key="basic_use_tool_response",
-    #     end_key=END,
-    # )
-
    return graph


@@ -77,9 +72,8 @@ def should_continue(state: BasicState) -> str:
    return (
        # If there are no tool calls, basic graph already streamed the answer
        END
-        if state.tool_choices[-1] is None
-        or len(state.tool_choices) > AGENT_MAX_TOOL_CALLS
-        else "tool_call"
+        if state.tool_choice is None
+        else "call_tool"
    )


@@ -91,7 +85,7 @@ if __name__ == "__main__":

    graph = basic_graph_builder()
    compiled_graph = graph.compile()
-    input = BasicInput(_unused=True)
+    input = BasicInput(unused=True)
    primary_llm, fast_llm = get_default_llms()
    with get_session_context_manager() as db_session:
        config, _ = get_test_config(
--- a/backend/onyx/agents/agent_search/basic/states.py
+++ b/backend/onyx/agents/agent_search/basic/states.py
@@ -17,7 +17,7 @@ from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
 class BasicInput(BaseModel):
    # Langgraph needs a nonempty input, but we pass in all static
    # data through a RunnableConfig.
-    _unused: bool = True
+    unused: bool = True


 ## Graph Output State
--- a/backend/onyx/agents/agent_search/core_state.py
+++ b/backend/onyx/agents/agent_search/core_state.py
@@ -9,7 +9,6 @@ class CoreState(BaseModel):
    This is the core state that is shared across all subgraphs.
    """

-    base_question: str = ""
    log_messages: Annotated[list[str], add] = []


@@ -18,4 +17,4 @@ class SubgraphCoreState(BaseModel):
    This is the core state that is shared across all subgraphs.
    """

-    log_messages: Annotated[list[str], add]
+    log_messages: Annotated[list[str], add] = []
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
@@ -1,8 +1,8 @@
 from datetime import datetime
 from typing import cast

+from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
-from langchain_core.messages import merge_message_runs
 from langchain_core.runnables.config import RunnableConfig

 from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
@@ -12,14 +12,45 @@ from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer
    SubQuestionAnswerCheckUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import AgentLLMErrorType
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK
+from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_CHECK
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
+from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. The sub-answer will be treated as 'relevant'",
+    rate_limit="LLM Rate Limit Error. The sub-answer will be treated as 'relevant'",
+    general_error="General LLM Error. The sub-answer will be treated as 'relevant'",
+)


+@log_function_time(print_only=True)
 def check_sub_answer(
    state: AnswerQuestionState, config: RunnableConfig
 ) -> SubQuestionAnswerCheckUpdate:
@@ -53,14 +84,42 @@ def check_sub_answer(

    graph_config = cast(GraphConfig, config["metadata"]["config"])
    fast_llm = graph_config.tooling.fast_llm
-    response = list(
-        fast_llm.stream(
+    agent_error: AgentErrorLog | None = None
+    response: BaseMessage | None = None
+    try:
+        response = run_with_timeout(
+            AGENT_TIMEOUT_LLM_SUBANSWER_CHECK,
+            fast_llm.invoke,
            prompt=msg,
+            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK,
        )
-    )

-    quality_str: str = merge_message_runs(response, chunk_separator="")[0].content
-    answer_quality = "yes" in quality_str.lower()
+        quality_str: str = cast(str, response.content)
+        answer_quality = binary_string_test(
+            text=quality_str, positive_value=AGENT_POSITIVE_VALUE_STR
+        )
+        log_result = f"Answer quality: {quality_str}"
+
+    except (LLMTimeoutError, TimeoutError):
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        answer_quality = True
+        log_result = agent_error.error_result
+        logger.error("LLM Timeout Error - check sub answer")
+
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+
+        answer_quality = True
+        log_result = agent_error.error_result
+        logger.error("LLM Rate Limit Error - check sub answer")

    return SubQuestionAnswerCheckUpdate(
        answer_quality=answer_quality,
@@ -69,7 +128,7 @@ def check_sub_answer(
                graph_component="initial  - generate individual sub answer",
                node_name="check sub answer",
                node_start_time=node_start_time,
-                result=f"Answer quality: {quality_str}",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
@@ -1,5 +1,4 @@
 from datetime import datetime
-from typing import Any
 from typing import cast

 from langchain_core.messages import merge_message_runs
@@ -16,6 +15,23 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_sub_question_answer_prompt,
 )
+from onyx.agents.agent_search.shared_graph_utils.calculations import (
+    dedup_sort_inference_section_list,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    LLM_ANSWER_ERROR_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
@@ -30,12 +46,25 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
+from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
+from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import NO_RECOVERED_DOCS
 from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
+from onyx.utils.timing import log_function_time

 logger = setup_logger()

+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. A sub-answer could not be constructed and the sub-question will be ignored.",
+    rate_limit="LLM Rate Limit Error. A sub-answer could not be constructed and the sub-question will be ignored.",
+    general_error="General LLM Error. A sub-answer could not be constructed and the sub-question will be ignored.",
+)

+
+@log_function_time(print_only=True)
 def generate_sub_answer(
    state: AnswerQuestionState,
    config: RunnableConfig,
@@ -51,12 +80,17 @@ def generate_sub_answer(
    state.verified_reranked_documents
    level, question_num = parse_question_id(state.question_id)
    context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
+
+    context_docs = dedup_sort_inference_section_list(context_docs)
+
    persona_contextualized_prompt = get_persona_agent_prompt_expressions(
        graph_config.inputs.search_request.persona
    ).contextualized_prompt

    if len(context_docs) == 0:
        answer_str = NO_RECOVERED_DOCS
+        cited_documents: list = []
+        log_results = "No documents retrieved"
        write_custom_event(
            "sub_answers",
            AgentAnswerPiece(
@@ -77,43 +111,75 @@ def generate_sub_answer(
            config=fast_llm.config,
        )

-        response: list[str | list[str | dict[str, Any]]] = []
        dispatch_timings: list[float] = []
-        for message in fast_llm.stream(
-            prompt=msg,
-        ):
-            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-            content = message.content
-            if not isinstance(content, str):
-                raise ValueError(
-                    f"Expected content to be a string, but got {type(content)}"
+        agent_error: AgentErrorLog | None = None
+        response: list[str] = []
+
+        def stream_sub_answer() -> list[str]:
+            for message in fast_llm.stream(
+                prompt=msg,
+                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION,
+            ):
+                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+                content = message.content
+                if not isinstance(content, str):
+                    raise ValueError(
+                        f"Expected content to be a string, but got {type(content)}"
+                    )
+                start_stream_token = datetime.now()
+                write_custom_event(
+                    "sub_answers",
+                    AgentAnswerPiece(
+                        answer_piece=content,
+                        level=level,
+                        level_question_num=question_num,
+                        answer_type="agent_sub_answer",
+                    ),
+                    writer,
                )
-            start_stream_token = datetime.now()
-            write_custom_event(
-                "sub_answers",
-                AgentAnswerPiece(
-                    answer_piece=content,
-                    level=level,
-                    level_question_num=question_num,
-                    answer_type="agent_sub_answer",
-                ),
-                writer,
-            )
-            end_stream_token = datetime.now()
-            dispatch_timings.append(
-                (end_stream_token - start_stream_token).microseconds
-            )
-            response.append(content)
+                end_stream_token = datetime.now()
+                dispatch_timings.append(
+                    (end_stream_token - start_stream_token).microseconds
+                )
+                response.append(content)
+            return response

-        answer_str = merge_message_runs(response, chunk_separator="")[0].content
-        logger.debug(
-            f"Average dispatch time: {sum(dispatch_timings) / len(dispatch_timings)}"
-        )
+        try:
+            response = run_with_timeout(
+                AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION,
+                stream_sub_answer,
+            )

-    answer_citation_ids = get_answer_citation_ids(answer_str)
-    cited_documents = [
-        context_docs[id] for id in answer_citation_ids if id < len(context_docs)
-    ]
+        except (LLMTimeoutError, TimeoutError):
+            agent_error = AgentErrorLog(
+                error_type=AgentLLMErrorType.TIMEOUT,
+                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+                error_result=_llm_node_error_strings.timeout,
+            )
+            logger.error("LLM Timeout Error - generate sub answer")
+        except LLMRateLimitError:
+            agent_error = AgentErrorLog(
+                error_type=AgentLLMErrorType.RATE_LIMIT,
+                error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+                error_result=_llm_node_error_strings.rate_limit,
+            )
+            logger.error("LLM Rate Limit Error - generate sub answer")
+
+        if agent_error:
+            answer_str = LLM_ANSWER_ERROR_MESSAGE
+            cited_documents = []
+            log_results = (
+                agent_error.error_result
+                or "Sub-answer generation failed due to LLM error"
+            )
+
+        else:
+            answer_str = merge_message_runs(response, chunk_separator="")[0].content
+            answer_citation_ids = get_answer_citation_ids(answer_str)
+            cited_documents = [
+                context_docs[id] for id in answer_citation_ids if id < len(context_docs)
+            ]
+            log_results = None

    stop_event = StreamStopInfo(
        stop_reason=StreamStopReason.FINISHED,
@@ -131,7 +197,7 @@ def generate_sub_answer(
                graph_component="initial - generate individual sub answer",
                node_name="generate sub answer",
                node_start_time=node_start_time,
-                result="",
+                result=log_results or "",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/states.py
@@ -42,10 +42,8 @@ class SubQuestionRetrievalIngestionUpdate(LoggerUpdate, BaseModel):


 class SubQuestionAnsweringInput(SubgraphCoreState):
-    question: str = ""
-    question_id: str = (
-        ""  # 0_0 is original question, everything else is <level>_<question_num>.
-    )
+    question: str
+    question_id: str
    # level 0 is original question and first decomposition, level 1 is follow up, etc
    # question_num is a unique number per original question per level.

--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
@@ -1,5 +1,4 @@
 from datetime import datetime
-from typing import Any
 from typing import cast

 from langchain_core.messages import HumanMessage
@@ -26,14 +25,31 @@ from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
+from onyx.agents.agent_search.shared_graph_utils.calculations import (
+    get_answer_generation_documents,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
 from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResultStats
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.operators import (
-    dedup_inference_sections,
+    dedup_inference_section_list,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    dispatch_main_answer_stop_info,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_deduplicated_structured_subquestion_documents,
+)
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
@@ -42,12 +58,20 @@ from onyx.agents.agent_search.shared_graph_utils.utils import remove_document_ci
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
+from onyx.chat.models import StreamingError
+from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
+from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
-from onyx.context.search.models import InferenceSection
-from onyx.prompts.agent_search import (
-    INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS,
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
 )
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
+from onyx.prompts.agent_search import INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS
 from onyx.prompts.agent_search import (
    INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
@@ -56,8 +80,17 @@ from onyx.prompts.agent_search import (
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
+from onyx.utils.threadpool_concurrency import run_with_timeout
+from onyx.utils.timing import log_function_time
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. The initial answer could not be generated.",
+    rate_limit="LLM Rate Limit Error. The initial answer could not be generated.",
+    general_error="General LLM Error. The initial answer could not be generated.",
+)


+@log_function_time(print_only=True)
 def generate_initial_answer(
    state: SubQuestionRetrievalState,
    config: RunnableConfig,
@@ -73,15 +106,19 @@ def generate_initial_answer(
    question = graph_config.inputs.search_request.query
    prompt_enrichment_components = get_prompt_enrichment_components(graph_config)

-    sub_questions_cited_documents = state.cited_documents
+    # get all documents cited in sub-questions
+    structured_subquestion_docs = get_deduplicated_structured_subquestion_documents(
+        state.sub_question_results
+    )
+
    orig_question_retrieval_documents = state.orig_question_retrieved_documents

-    consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
+    consolidated_context_docs = structured_subquestion_docs.cited_documents
    counter = 0
    for original_doc_number, original_doc in enumerate(
        orig_question_retrieval_documents
    ):
-        if original_doc_number not in sub_questions_cited_documents:
+        if original_doc_number not in structured_subquestion_docs.cited_documents:
            if (
                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
                or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
@@ -90,15 +127,18 @@ def generate_initial_answer(
                counter += 1

    # sort docs by their scores - though the scores refer to different questions
-    relevant_docs = dedup_inference_sections(
-        consolidated_context_docs, consolidated_context_docs
-    )
+    relevant_docs = dedup_inference_section_list(consolidated_context_docs)

    sub_questions: list[str] = []
-    streamed_documents = (
-        relevant_docs
-        if len(relevant_docs) > 0
-        else state.orig_question_retrieved_documents[:15]
+
+    # Create the list of documents to stream out. Start with the
+    # ones that wil be in the context (or, if len == 0, use docs
+    # that were retrieved for the original question)
+    answer_generation_documents = get_answer_generation_documents(
+        relevant_docs=relevant_docs,
+        context_documents=structured_subquestion_docs.context_documents,
+        original_question_docs=orig_question_retrieval_documents,
+        max_docs=AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER,
    )

    # Use the query info from the base document retrieval
@@ -108,11 +148,13 @@ def generate_initial_answer(
        graph_config.tooling.search_tool
    ), "search_tool must be provided for agentic search"

-    relevance_list = relevance_from_docs(relevant_docs)
+    relevance_list = relevance_from_docs(
+        answer_generation_documents.streaming_documents
+    )
    for tool_response in yield_search_responses(
        query=question,
-        reranked_sections=streamed_documents,
-        final_context_sections=streamed_documents,
+        reranked_sections=answer_generation_documents.streaming_documents,
+        final_context_sections=answer_generation_documents.context_documents,
        search_query_info=query_info,
        get_section_relevance=lambda: relevance_list,
        search_tool=graph_config.tooling.search_tool,
@@ -128,7 +170,7 @@ def generate_initial_answer(
            writer,
        )

-    if len(relevant_docs) == 0:
+    if len(answer_generation_documents.context_documents) == 0:
        write_custom_event(
            "initial_agent_answer",
            AgentAnswerPiece(
@@ -192,9 +234,13 @@ def generate_initial_answer(

        sub_questions = all_sub_questions  # Replace the original assignment

-        model = graph_config.tooling.fast_llm
+        model = (
+            graph_config.tooling.fast_llm
+            if AGENT_ANSWER_GENERATION_BY_FAST_LLM
+            else graph_config.tooling.primary_llm
+        )

-        doc_context = format_docs(relevant_docs)
+        doc_context = format_docs(answer_generation_documents.context_documents)
        doc_context = trim_prompt_piece(
            config=model.config,
            prompt_piece=doc_context,
@@ -222,32 +268,92 @@ def generate_initial_answer(
            )
        ]

-        streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
+        streamed_tokens: list[str] = [""]
        dispatch_timings: list[float] = []
-        for message in model.stream(msg):
-            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-            content = message.content
-            if not isinstance(content, str):
-                raise ValueError(
-                    f"Expected content to be a string, but got {type(content)}"
-                )
-            start_stream_token = datetime.now()

+        agent_error: AgentErrorLog | None = None
+
+        def stream_initial_answer() -> list[str]:
+            response: list[str] = []
+            for message in model.stream(
+                msg,
+                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
+            ):
+                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+                content = message.content
+                if not isinstance(content, str):
+                    raise ValueError(
+                        f"Expected content to be a string, but got {type(content)}"
+                    )
+                start_stream_token = datetime.now()
+
+                write_custom_event(
+                    "initial_agent_answer",
+                    AgentAnswerPiece(
+                        answer_piece=content,
+                        level=0,
+                        level_question_num=0,
+                        answer_type="agent_level_answer",
+                    ),
+                    writer,
+                )
+                end_stream_token = datetime.now()
+                dispatch_timings.append(
+                    (end_stream_token - start_stream_token).microseconds
+                )
+                response.append(content)
+            return response
+
+        try:
+            streamed_tokens = run_with_timeout(
+                AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
+                stream_initial_answer,
+            )
+
+        except (LLMTimeoutError, TimeoutError):
+            agent_error = AgentErrorLog(
+                error_type=AgentLLMErrorType.TIMEOUT,
+                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+                error_result=_llm_node_error_strings.timeout,
+            )
+            logger.error("LLM Timeout Error - generate initial answer")
+
+        except LLMRateLimitError:
+            agent_error = AgentErrorLog(
+                error_type=AgentLLMErrorType.RATE_LIMIT,
+                error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+                error_result=_llm_node_error_strings.rate_limit,
+            )
+            logger.error("LLM Rate Limit Error - generate initial answer")
+
+        if agent_error:
            write_custom_event(
                "initial_agent_answer",
-                AgentAnswerPiece(
-                    answer_piece=content,
-                    level=0,
-                    level_question_num=0,
-                    answer_type="agent_level_answer",
+                StreamingError(
+                    error=AGENT_LLM_TIMEOUT_MESSAGE,
                ),
                writer,
            )
-            end_stream_token = datetime.now()
-            dispatch_timings.append(
-                (end_stream_token - start_stream_token).microseconds
+            return InitialAnswerUpdate(
+                initial_answer=None,
+                answer_error=AgentErrorLog(
+                    error_message=agent_error.error_message or "An LLM error occurred",
+                    error_type=agent_error.error_type,
+                    error_result=agent_error.error_result,
+                ),
+                initial_agent_stats=None,
+                generated_sub_questions=sub_questions,
+                agent_base_end_time=None,
+                agent_base_metrics=None,
+                log_messages=[
+                    get_langgraph_node_log_string(
+                        graph_component="initial - generate initial answer",
+                        node_name="generate initial answer",
+                        node_start_time=node_start_time,
+                        result=agent_error.error_result or "An LLM error occurred",
+                    )
+                ],
            )
-            streamed_tokens.append(content)

        logger.debug(
            f"Average dispatch time for initial answer: {sum(dispatch_timings) / len(dispatch_timings)}"
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/validate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/validate_initial_answer.py
@@ -10,8 +10,10 @@ from onyx.agents.agent_search.deep_search.main.states import (
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
+from onyx.utils.timing import log_function_time


+@log_function_time(print_only=True)
 def validate_initial_answer(
    state: SubQuestionRetrievalState,
 ) -> InitialAnswerQualityUpdate:
@@ -25,7 +27,7 @@ def validate_initial_answer(
        f"--------{node_start_time}--------Checking for base answer validity - for not set True/False manually"
    )

-    verdict = True
+    verdict = True  # not actually required as already streamed out. Refinement will do similar

    return InitialAnswerQualityUpdate(
        initial_answer_quality_eval=verdict,
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
@@ -23,6 +23,8 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_history_prompt,
 )
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
@@ -33,17 +35,34 @@ from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
 from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
-    INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH,
+    INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT,
 )
 from onyx.prompts.agent_search import (
-    INITIAL_QUESTION_DECOMPOSITION_PROMPT,
+    INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT,
 )
 from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
+from onyx.utils.timing import log_function_time

 logger = setup_logger()

+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. Sub-questions could not be generated.",
+    rate_limit="LLM Rate Limit Error. Sub-questions could not be generated.",
+    general_error="General LLM Error. Sub-questions could not be generated.",
+)

+
+@log_function_time(print_only=True)
 def decompose_orig_question(
    state: SubQuestionRetrievalState,
    config: RunnableConfig,
@@ -85,15 +104,15 @@ def decompose_orig_question(
            ]
        )

-        decomposition_prompt = (
-            INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH.format(
-                question=question, sample_doc_str=sample_doc_str, history=history
-            )
+        decomposition_prompt = INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT.format(
+            question=question, sample_doc_str=sample_doc_str, history=history
        )

    else:
-        decomposition_prompt = INITIAL_QUESTION_DECOMPOSITION_PROMPT.format(
-            question=question, history=history
+        decomposition_prompt = (
+            INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT.format(
+                question=question, history=history
+            )
        )

    # Start decomposition
@@ -112,32 +131,44 @@ def decompose_orig_question(
    )

    # dispatches custom events for subquestion tokens, adding in subquestion ids.
-    streamed_tokens = dispatch_separated(
-        model.stream(msg),
-        dispatch_subquestion(0, writer),
-        sep_callback=dispatch_subquestion_sep(0, writer),
-    )

-    stop_event = StreamStopInfo(
-        stop_reason=StreamStopReason.FINISHED,
-        stream_type=StreamType.SUB_QUESTIONS,
-        level=0,
-    )
-    write_custom_event("stream_finished", stop_event, writer)
+    streamed_tokens: list[BaseMessage_Content] = []

-    deomposition_response = merge_content(*streamed_tokens)
+    try:
+        streamed_tokens = run_with_timeout(
+            AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION,
+            dispatch_separated,
+            model.stream(
+                msg,
+                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
+            ),
+            dispatch_subquestion(0, writer),
+            sep_callback=dispatch_subquestion_sep(0, writer),
+        )

-    # this call should only return strings. Commenting out for efficiency
-    # assert [type(tok) == str for tok in streamed_tokens]
+        decomposition_response = merge_content(*streamed_tokens)

-    # use no-op cast() instead of str() which runs code
-    # list_of_subquestions = clean_and_parse_list_string(cast(str, response))
-    list_of_subqs = cast(str, deomposition_response).split("\n")
+        list_of_subqs = cast(str, decomposition_response).split("\n")

-    decomp_list: list[str] = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
+        initial_sub_questions = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
+        log_result = f"decomposed original question into {len(initial_sub_questions)} subquestions"
+
+        stop_event = StreamStopInfo(
+            stop_reason=StreamStopReason.FINISHED,
+            stream_type=StreamType.SUB_QUESTIONS,
+            level=0,
+        )
+        write_custom_event("stream_finished", stop_event, writer)
+
+    except (LLMTimeoutError, TimeoutError) as e:
+        logger.error("LLM Timeout Error - decompose orig question")
+        raise e  # fail loudly on this critical step
+    except LLMRateLimitError as e:
+        logger.error("LLM Rate Limit Error - decompose orig question")
+        raise e

    return InitialQuestionDecompositionUpdate(
-        initial_sub_questions=decomp_list,
+        initial_sub_questions=initial_sub_questions,
        agent_start_time=agent_start_time,
        agent_refined_start_time=None,
        agent_refined_end_time=None,
@@ -151,7 +182,7 @@ def decompose_orig_question(
                graph_component="initial - generate sub answers",
                node_name="decompose original question",
                node_start_time=node_start_time,
-                result=f"decomposed original question into {len(decomp_list)} subquestions",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/edges.py
@@ -25,21 +25,20 @@ logger = setup_logger()

 def route_initial_tool_choice(
    state: MainState, config: RunnableConfig
-) -> Literal["tool_call", "start_agent_search", "logging_node"]:
+) -> Literal["call_tool", "start_agent_search", "logging_node"]:
    """
    LangGraph edge to route to agent search.
    """
    agent_config = cast(GraphConfig, config["metadata"]["config"])
-    if state.tool_choices[-1] is not None:
+    if state.tool_choice is not None:
        if (
            agent_config.behavior.use_agentic_search
            and agent_config.tooling.search_tool is not None
-            and state.tool_choices[-1].tool.name
-            == agent_config.tooling.search_tool.name
+            and state.tool_choice.tool.name == agent_config.tooling.search_tool.name
        ):
            return "start_agent_search"
        else:
-            return "tool_call"
+            return "call_tool"
    else:
        return "logging_node"

--- a/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
@@ -26,8 +26,8 @@ from onyx.agents.agent_search.deep_search.main.nodes.decide_refinement_need impo
 from onyx.agents.agent_search.deep_search.main.nodes.extract_entities_terms import (
    extract_entities_terms,
 )
-from onyx.agents.agent_search.deep_search.main.nodes.generate_refined_answer import (
-    generate_refined_answer,
+from onyx.agents.agent_search.deep_search.main.nodes.generate_validate_refined_answer import (
+    generate_validate_refined_answer,
 )
 from onyx.agents.agent_search.deep_search.main.nodes.ingest_refined_sub_answers import (
    ingest_refined_sub_answers,
@@ -43,14 +43,14 @@ from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.deep_search.refinement.consolidate_sub_answers.graph_builder import (
    answer_refined_query_graph_builder,
 )
-from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
-    basic_use_tool_response,
-)
-from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
+from onyx.agents.agent_search.orchestration.nodes.call_tool import call_tool
+from onyx.agents.agent_search.orchestration.nodes.choose_tool import choose_tool
 from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
    prepare_tool_input,
 )
-from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
+from onyx.agents.agent_search.orchestration.nodes.use_tool_response import (
+    basic_use_tool_response,
+)
 from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
 from onyx.utils.logger import setup_logger

@@ -77,13 +77,13 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
    # Choose the initial tool
    graph.add_node(
        node="initial_tool_choice",
-        action=llm_tool_choice,
+        action=choose_tool,
    )

    # Call the tool, if required
    graph.add_node(
-        node="tool_call",
-        action=tool_call,
+        node="call_tool",
+        action=call_tool,
    )

    # Use the tool response
@@ -126,8 +126,8 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:

    # Node to generate the refined answer
    graph.add_node(
-        node="generate_refined_answer",
-        action=generate_refined_answer,
+        node="generate_validate_refined_answer",
+        action=generate_validate_refined_answer,
    )

    # Early node to extract the entities and terms from the initial answer,
@@ -168,11 +168,11 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
    graph.add_conditional_edges(
        "initial_tool_choice",
        route_initial_tool_choice,
-        ["tool_call", "start_agent_search", "logging_node"],
+        ["call_tool", "start_agent_search", "logging_node"],
    )

    graph.add_edge(
-        start_key="tool_call",
+        start_key="call_tool",
        end_key="basic_use_tool_response",
    )
    graph.add_edge(
@@ -215,11 +215,11 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:

    graph.add_edge(
        start_key="ingest_refined_sub_answers",
-        end_key="generate_refined_answer",
+        end_key="generate_validate_refined_answer",
    )

    graph.add_edge(
-        start_key="generate_refined_answer",
+        start_key="generate_validate_refined_answer",
        end_key="compare_answers",
    )
    graph.add_edge(
@@ -252,9 +252,7 @@ if __name__ == "__main__":
            db_session, primary_llm, fast_llm, search_request
        )

-        inputs = MainInput(
-            base_question=graph_config.inputs.search_request.query, log_messages=[]
-        )
+        inputs = MainInput(log_messages=[])

        for thing in compiled_graph.stream(
            input=inputs,
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
@@ -1,6 +1,7 @@
 from datetime import datetime
 from typing import cast

+from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.runnables import RunnableConfig
 from langgraph.types import StreamWriter
@@ -10,16 +11,53 @@ from onyx.agents.agent_search.deep_search.main.states import (
 )
 from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import RefinedAnswerImprovement
+from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS
+from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_COMPARE_ANSWERS
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
 )
+from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out, and the answers could not be compared.",
+    rate_limit="The LLM encountered a rate limit, and the answers could not be compared.",
+    general_error="The LLM encountered an error, and the answers could not be compared.",
+)
+
+_ANSWER_QUALITY_NOT_SUFFICIENT_MESSAGE = (
+    "Answer quality is not sufficient, so stay with the initial answer."
+)


+@log_function_time(print_only=True)
 def compare_answers(
    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> InitialRefinedAnswerComparisonUpdate:
@@ -34,21 +72,78 @@ def compare_answers(
    initial_answer = state.initial_answer
    refined_answer = state.refined_answer

+    # if answer quality is not sufficient, then stay with the initial answer
+    if not state.refined_answer_quality:
+        write_custom_event(
+            "refined_answer_improvement",
+            RefinedAnswerImprovement(
+                refined_answer_improvement=False,
+            ),
+            writer,
+        )
+
+        return InitialRefinedAnswerComparisonUpdate(
+            refined_answer_improvement_eval=False,
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="main",
+                    node_name="compare answers",
+                    node_start_time=node_start_time,
+                    result=_ANSWER_QUALITY_NOT_SUFFICIENT_MESSAGE,
+                )
+            ],
+        )
+
    compare_answers_prompt = INITIAL_REFINED_ANSWER_COMPARISON_PROMPT.format(
        question=question, initial_answer=initial_answer, refined_answer=refined_answer
    )

    msg = [HumanMessage(content=compare_answers_prompt)]

+    agent_error: AgentErrorLog | None = None
    # Get the rewritten queries in a defined format
    model = graph_config.tooling.fast_llm
-
+    resp: BaseMessage | None = None
+    refined_answer_improvement: bool | None = None
    # no need to stream this
-    resp = model.invoke(msg)
+    try:
+        resp = run_with_timeout(
+            AGENT_TIMEOUT_LLM_COMPARE_ANSWERS,
+            model.invoke,
+            prompt=msg,
+            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS,
+        )

-    refined_answer_improvement = (
-        isinstance(resp.content, str) and "yes" in resp.content.lower()
-    )
+    except (LLMTimeoutError, TimeoutError):
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - compare answers")
+        # continue as True in this support step
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - compare answers")
+        # continue as True in this support step
+
+    if agent_error or resp is None:
+        refined_answer_improvement = True
+        if agent_error:
+            log_result = agent_error.error_result
+        else:
+            log_result = "An answer could not be generated."
+
+    else:
+        refined_answer_improvement = binary_string_test(
+            text=cast(str, resp.content),
+            positive_value=AGENT_POSITIVE_VALUE_STR,
+        )
+        log_result = f"Answer comparison: {refined_answer_improvement}"

    write_custom_event(
        "refined_answer_improvement",
@@ -65,7 +160,7 @@ def compare_answers(
                graph_component="main",
                node_name="compare answers",
                node_start_time=node_start_time,
-                result=f"Answer comparison: {refined_answer_improvement}",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
@@ -21,6 +21,18 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_history_prompt,
 )
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    format_entity_term_extraction,
@@ -30,12 +42,35 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import StreamingError
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
-    REFINEMENT_QUESTION_DECOMPOSITION_PROMPT,
+    REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS,
 )
 from onyx.tools.models import ToolCallKickoff
+from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_ANSWERED_SUBQUESTIONS_DIVIDER = "\n\n---\n\n"
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out. The sub-questions could not be generated.",
+    rate_limit="The LLM encountered a rate limit. The sub-questions could not be generated.",
+    general_error="The LLM encountered an error. The sub-questions could not be generated.",
+)


+@log_function_time(print_only=True)
 def create_refined_sub_questions(
    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> RefinedQuestionDecompositionUpdate:
@@ -72,8 +107,10 @@ def create_refined_sub_questions(

    initial_question_answers = state.sub_question_results

-    addressed_question_list = [
-        x.question for x in initial_question_answers if x.verified_high_quality
+    addressed_subquestions_with_answers = [
+        f"Subquestion: {x.question}\nSubanswer:\n{x.answer}"
+        for x in initial_question_answers
+        if x.verified_high_quality and x.answer
    ]

    failed_question_list = [
@@ -82,12 +119,14 @@ def create_refined_sub_questions(

    msg = [
        HumanMessage(
-            content=REFINEMENT_QUESTION_DECOMPOSITION_PROMPT.format(
+            content=REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS.format(
                question=question,
                history=history,
                entity_term_extraction_str=entity_term_extraction_str,
                base_answer=base_answer,
-                answered_sub_questions="\n - ".join(addressed_question_list),
+                answered_subquestions_with_answers=_ANSWERED_SUBQUESTIONS_DIVIDER.join(
+                    addressed_subquestions_with_answers
+                ),
                failed_sub_questions="\n - ".join(failed_question_list),
            ),
        )
@@ -96,29 +135,67 @@ def create_refined_sub_questions(
    # Grader
    model = graph_config.tooling.fast_llm

-    streamed_tokens = dispatch_separated(
-        model.stream(msg),
-        dispatch_subquestion(1, writer),
-        sep_callback=dispatch_subquestion_sep(1, writer),
-    )
-    response = merge_content(*streamed_tokens)
+    agent_error: AgentErrorLog | None = None
+    streamed_tokens: list[BaseMessage_Content] = []
+    try:
+        streamed_tokens = run_with_timeout(
+            AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION,
+            dispatch_separated,
+            model.stream(
+                msg,
+                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
+            ),
+            dispatch_subquestion(1, writer),
+            sep_callback=dispatch_subquestion_sep(1, writer),
+        )
+    except (LLMTimeoutError, TimeoutError):
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - create refined sub questions")

-    if isinstance(response, str):
-        parsed_response = [q for q in response.split("\n") if q.strip() != ""]
-    else:
-        raise ValueError("LLM response is not a string")
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - create refined sub questions")

-    refined_sub_question_dict = {}
-    for sub_question_num, sub_question in enumerate(parsed_response):
-        refined_sub_question = RefinementSubQuestion(
-            sub_question=sub_question,
-            sub_question_id=make_question_id(1, sub_question_num + 1),
-            verified=False,
-            answered=False,
-            answer="",
+    if agent_error:
+        refined_sub_question_dict: dict[int, RefinementSubQuestion] = {}
+        log_result = agent_error.error_result
+        write_custom_event(
+            "refined_sub_question_creation_error",
+            StreamingError(
+                error="Your LLM was not able to create refined sub questions in time and timed out. Please try again.",
+            ),
+            writer,
        )

-        refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
+    else:
+        response = merge_content(*streamed_tokens)
+
+        if isinstance(response, str):
+            parsed_response = [q for q in response.split("\n") if q.strip() != ""]
+        else:
+            raise ValueError("LLM response is not a string")
+
+        refined_sub_question_dict = {}
+        for sub_question_num, sub_question in enumerate(parsed_response):
+            refined_sub_question = RefinementSubQuestion(
+                sub_question=sub_question,
+                sub_question_id=make_question_id(1, sub_question_num + 1),
+                verified=False,
+                answered=False,
+                answer="",
+            )
+
+            refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
+
+        log_result = f"Created {len(refined_sub_question_dict)} refined sub questions"

    return RefinedQuestionDecompositionUpdate(
        refined_sub_questions=refined_sub_question_dict,
@@ -128,7 +205,7 @@ def create_refined_sub_questions(
                graph_component="main",
                node_name="create refined sub questions",
                node_start_time=node_start_time,
-                result=f"Created {len(refined_sub_question_dict)} refined sub questions",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
@@ -11,8 +11,10 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
+from onyx.utils.timing import log_function_time


+@log_function_time(print_only=True)
 def decide_refinement_need(
    state: MainState, config: RunnableConfig
 ) -> RequireRefinemenEvalUpdate:
@@ -26,6 +28,19 @@ def decide_refinement_need(

    decision = True  # TODO: just for current testing purposes

+    if state.answer_error:
+        return RequireRefinemenEvalUpdate(
+            require_refined_answer_eval=False,
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="main",
+                    node_name="decide refinement need",
+                    node_start_time=node_start_time,
+                    result="Timeout Error",
+                )
+            ],
+        )
+
    log_messages = [
        get_langgraph_node_log_string(
            graph_component="main",
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
@@ -21,11 +21,22 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION,
+)
 from onyx.configs.constants import NUM_EXPLORATORY_DOCS
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE
+from onyx.utils.threadpool_concurrency import run_with_timeout
+from onyx.utils.timing import log_function_time


+@log_function_time(print_only=True)
 def extract_entities_terms(
    state: MainState, config: RunnableConfig
 ) -> EntityTermExtractionUpdate:
@@ -79,29 +90,42 @@ def extract_entities_terms(
    ]
    fast_llm = graph_config.tooling.fast_llm
    # Grader
-    llm_response = fast_llm.invoke(
-        prompt=msg,
-    )
-
-    cleaned_response = (
-        str(llm_response.content).replace("```json\n", "").replace("\n```", "")
-    )
-    first_bracket = cleaned_response.find("{")
-    last_bracket = cleaned_response.rfind("}")
-    cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
-
    try:
-        entity_extraction_result = EntityExtractionResult.model_validate_json(
-            cleaned_response
+        llm_response = run_with_timeout(
+            AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION,
+            fast_llm.invoke,
+            prompt=msg,
+            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
        )
-    except ValueError:
-        logger.error("Failed to parse LLM response as JSON in Entity-Term Extraction")
+
+        cleaned_response = (
+            str(llm_response.content).replace("```json\n", "").replace("\n```", "")
+        )
+        first_bracket = cleaned_response.find("{")
+        last_bracket = cleaned_response.rfind("}")
+        cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
+
+        try:
+            entity_extraction_result = EntityExtractionResult.model_validate_json(
+                cleaned_response
+            )
+        except ValueError:
+            logger.error(
+                "Failed to parse LLM response as JSON in Entity-Term Extraction"
+            )
+            entity_extraction_result = EntityExtractionResult(
+                retrieved_entities_relationships=EntityRelationshipTermExtraction(),
+            )
+    except (LLMTimeoutError, TimeoutError):
+        logger.error("LLM Timeout Error - extract entities terms")
        entity_extraction_result = EntityExtractionResult(
-            retrieved_entities_relationships=EntityRelationshipTermExtraction(
-                entities=[],
-                relationships=[],
-                terms=[],
-            ),
+            retrieved_entities_relationships=EntityRelationshipTermExtraction(),
+        )
+
+    except LLMRateLimitError:
+        logger.error("LLM Rate Limit Error - extract entities terms")
+        entity_extraction_result = EntityExtractionResult(
+            retrieved_entities_relationships=EntityRelationshipTermExtraction(),
        )

    return EntityTermExtractionUpdate(
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
@@ -1,5 +1,4 @@
 from datetime import datetime
-from typing import Any
 from typing import cast

 from langchain_core.messages import HumanMessage
@@ -11,27 +10,49 @@ from onyx.agents.agent_search.deep_search.main.models import (
    AgentRefinedMetrics,
 )
 from onyx.agents.agent_search.deep_search.main.operations import get_query_info
-from onyx.agents.agent_search.deep_search.main.operations import logger
 from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.deep_search.main.states import (
    RefinedAnswerUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test_after_answer_separator,
+)
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    get_prompt_enrichment_components,
 )
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
-from onyx.agents.agent_search.shared_graph_utils.models import InferenceSection
+from onyx.agents.agent_search.shared_graph_utils.calculations import (
+    get_answer_generation_documents,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import AGENT_ANSWER_SEPARATOR
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats
 from onyx.agents.agent_search.shared_graph_utils.operators import (
-    dedup_inference_sections,
+    dedup_inference_section_list,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    dispatch_main_answer_stop_info,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_deduplicated_structured_subquestion_documents,
+)
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
@@ -43,26 +64,58 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
+from onyx.chat.models import StreamingError
+from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
+from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS,
 )
 from onyx.prompts.agent_search import (
    REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
+from onyx.prompts.agent_search import (
+    REFINED_ANSWER_VALIDATION_PROMPT,
+)
 from onyx.prompts.agent_search import (
    SUB_QUESTION_ANSWER_TEMPLATE_REFINED,
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
+from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out. The refined answer could not be generated.",
+    rate_limit="The LLM encountered a rate limit. The refined answer could not be generated.",
+    general_error="The LLM encountered an error. The refined answer could not be generated.",
+)


-def generate_refined_answer(
+@log_function_time(print_only=True)
+def generate_validate_refined_answer(
    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> RefinedAnswerUpdate:
    """
-    LangGraph node to generate the refined answer.
+    LangGraph node to generate the refined answer and validate it.
    """

    node_start_time = datetime.now()
@@ -76,19 +129,24 @@ def generate_refined_answer(
    )

    verified_reranked_documents = state.verified_reranked_documents
-    sub_questions_cited_documents = state.cited_documents
+
+    # get all documents cited in sub-questions
+    structured_subquestion_docs = get_deduplicated_structured_subquestion_documents(
+        state.sub_question_results
+    )
+
    original_question_verified_documents = (
        state.orig_question_verified_reranked_documents
    )
    original_question_retrieved_documents = state.orig_question_retrieved_documents

-    consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
+    consolidated_context_docs = structured_subquestion_docs.cited_documents

    counter = 0
    for original_doc_number, original_doc in enumerate(
        original_question_verified_documents
    ):
-        if original_doc_number not in sub_questions_cited_documents:
+        if original_doc_number not in structured_subquestion_docs.cited_documents:
            if (
                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
                or len(consolidated_context_docs)
@@ -99,14 +157,16 @@ def generate_refined_answer(
                counter += 1

    # sort docs by their scores - though the scores refer to different questions
-    relevant_docs = dedup_inference_sections(
-        consolidated_context_docs, consolidated_context_docs
-    )
+    relevant_docs = dedup_inference_section_list(consolidated_context_docs)

-    streaming_docs = (
-        relevant_docs
-        if len(relevant_docs) > 0
-        else original_question_retrieved_documents[:15]
+    # Create the list of documents to stream out. Start with the
+    # ones that wil be in the context (or, if len == 0, use docs
+    # that were retrieved for the original question)
+    answer_generation_documents = get_answer_generation_documents(
+        relevant_docs=relevant_docs,
+        context_documents=structured_subquestion_docs.context_documents,
+        original_question_docs=original_question_retrieved_documents,
+        max_docs=AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER,
    )

    query_info = get_query_info(state.orig_question_sub_query_retrieval_results)
@@ -114,11 +174,13 @@ def generate_refined_answer(
        graph_config.tooling.search_tool
    ), "search_tool must be provided for agentic search"
    # stream refined answer docs, or original question docs if no relevant docs are found
-    relevance_list = relevance_from_docs(relevant_docs)
+    relevance_list = relevance_from_docs(
+        answer_generation_documents.streaming_documents
+    )
    for tool_response in yield_search_responses(
        query=question,
-        reranked_sections=streaming_docs,
-        final_context_sections=streaming_docs,
+        reranked_sections=answer_generation_documents.streaming_documents,
+        final_context_sections=answer_generation_documents.context_documents,
        search_query_info=query_info,
        get_section_relevance=lambda: relevance_list,
        search_tool=graph_config.tooling.search_tool,
@@ -198,8 +260,13 @@ def generate_refined_answer(
        else REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS
    )

-    model = graph_config.tooling.fast_llm
-    relevant_docs_str = format_docs(relevant_docs)
+    model = (
+        graph_config.tooling.fast_llm
+        if AGENT_ANSWER_GENERATION_BY_FAST_LLM
+        else graph_config.tooling.primary_llm
+    )
+
+    relevant_docs_str = format_docs(answer_generation_documents.context_documents)
    relevant_docs_str = trim_prompt_piece(
        model.config,
        relevant_docs_str,
@@ -229,30 +296,89 @@ def generate_refined_answer(
        )
    ]

-    streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
+    streamed_tokens: list[str] = [""]
    dispatch_timings: list[float] = []
-    for message in model.stream(msg):
-        # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-        content = message.content
-        if not isinstance(content, str):
-            raise ValueError(
-                f"Expected content to be a string, but got {type(content)}"
-            )
+    agent_error: AgentErrorLog | None = None

-        start_stream_token = datetime.now()
+    def stream_refined_answer() -> list[str]:
+        for message in model.stream(
+            msg, timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
+        ):
+            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+            content = message.content
+            if not isinstance(content, str):
+                raise ValueError(
+                    f"Expected content to be a string, but got {type(content)}"
+                )
+
+            start_stream_token = datetime.now()
+            write_custom_event(
+                "refined_agent_answer",
+                AgentAnswerPiece(
+                    answer_piece=content,
+                    level=1,
+                    level_question_num=0,
+                    answer_type="agent_level_answer",
+                ),
+                writer,
+            )
+            end_stream_token = datetime.now()
+            dispatch_timings.append(
+                (end_stream_token - start_stream_token).microseconds
+            )
+            streamed_tokens.append(content)
+        return streamed_tokens
+
+    try:
+        streamed_tokens = run_with_timeout(
+            AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION,
+            stream_refined_answer,
+        )
+
+    except (LLMTimeoutError, TimeoutError):
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - generate refined answer")
+
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - generate refined answer")
+
+    if agent_error:
        write_custom_event(
-            "refined_agent_answer",
-            AgentAnswerPiece(
-                answer_piece=content,
-                level=1,
-                level_question_num=0,
-                answer_type="agent_level_answer",
+            "initial_agent_answer",
+            StreamingError(
+                error=AGENT_LLM_TIMEOUT_MESSAGE,
            ),
            writer,
        )
-        end_stream_token = datetime.now()
-        dispatch_timings.append((end_stream_token - start_stream_token).microseconds)
-        streamed_tokens.append(content)
+
+        return RefinedAnswerUpdate(
+            refined_answer=None,
+            refined_answer_quality=False,  # TODO: replace this with the actual check value
+            refined_agent_stats=None,
+            agent_refined_end_time=None,
+            agent_refined_metrics=AgentRefinedMetrics(
+                refined_doc_boost_factor=0.0,
+                refined_question_boost_factor=0.0,
+                duration_s=None,
+            ),
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="main",
+                    node_name="generate refined answer",
+                    node_start_time=node_start_time,
+                    result=agent_error.error_result or "An LLM error occurred",
+                )
+            ],
+        )

    logger.debug(
        f"Average dispatch time for refined answer: {sum(dispatch_timings) / len(dispatch_timings)}"
@@ -261,54 +387,47 @@ def generate_refined_answer(
    response = merge_content(*streamed_tokens)
    answer = cast(str, response)

+    # run a validation step for the refined answer only
+
+    msg = [
+        HumanMessage(
+            content=REFINED_ANSWER_VALIDATION_PROMPT.format(
+                question=question,
+                history=prompt_enrichment_components.history,
+                answered_sub_questions=sub_question_answer_str,
+                relevant_docs=relevant_docs_str,
+                proposed_answer=answer,
+                persona_specification=persona_contextualized_prompt,
+            )
+        )
+    ]
+
+    validation_model = graph_config.tooling.fast_llm
+    try:
+        validation_response = run_with_timeout(
+            AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION,
+            validation_model.invoke,
+            prompt=msg,
+            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
+        )
+        refined_answer_quality = binary_string_test_after_answer_separator(
+            text=cast(str, validation_response.content),
+            positive_value=AGENT_POSITIVE_VALUE_STR,
+            separator=AGENT_ANSWER_SEPARATOR,
+        )
+    except (LLMTimeoutError, TimeoutError):
+        refined_answer_quality = True
+        logger.error("LLM Timeout Error - validate refined answer")
+
+    except LLMRateLimitError:
+        refined_answer_quality = True
+        logger.error("LLM Rate Limit Error - validate refined answer")
+
    refined_agent_stats = RefinedAgentStats(
        revision_doc_efficiency=refined_doc_effectiveness,
        revision_question_efficiency=revision_question_efficiency,
    )

-    logger.debug(f"\n\n---INITIAL ANSWER ---\n\n Answer:\n Agent: {initial_answer}")
-    logger.debug("-" * 10)
-    logger.debug(f"\n\n---REVISED AGENT ANSWER ---\n\n Answer:\n Agent: {answer}")
-
-    logger.debug("-" * 100)
-
-    if state.initial_agent_stats:
-        initial_doc_boost_factor = state.initial_agent_stats.agent_effectiveness.get(
-            "utilized_chunk_ratio", "--"
-        )
-        initial_support_boost_factor = (
-            state.initial_agent_stats.agent_effectiveness.get("support_ratio", "--")
-        )
-        num_initial_verified_docs = state.initial_agent_stats.original_question.get(
-            "num_verified_documents", "--"
-        )
-        initial_verified_docs_avg_score = (
-            state.initial_agent_stats.original_question.get("verified_avg_score", "--")
-        )
-        initial_sub_questions_verified_docs = (
-            state.initial_agent_stats.sub_questions.get("num_verified_documents", "--")
-        )
-
-        logger.debug("INITIAL AGENT STATS")
-        logger.debug(f"Document Boost Factor: {initial_doc_boost_factor}")
-        logger.debug(f"Support Boost Factor: {initial_support_boost_factor}")
-        logger.debug(f"Originally Verified Docs: {num_initial_verified_docs}")
-        logger.debug(
-            f"Originally Verified Docs Avg Score: {initial_verified_docs_avg_score}"
-        )
-        logger.debug(
-            f"Sub-Questions Verified Docs: {initial_sub_questions_verified_docs}"
-        )
-    if refined_agent_stats:
-        logger.debug("-" * 10)
-        logger.debug("REFINED AGENT STATS")
-        logger.debug(
-            f"Revision Doc Factor: {refined_agent_stats.revision_doc_efficiency}"
-        )
-        logger.debug(
-            f"Revision Question Factor: {refined_agent_stats.revision_question_efficiency}"
-        )
-
    agent_refined_end_time = datetime.now()
    if state.agent_refined_start_time:
        agent_refined_duration = (
@@ -325,7 +444,7 @@ def generate_refined_answer(

    return RefinedAnswerUpdate(
        refined_answer=answer,
-        refined_answer_quality=True,  # TODO: replace this with the actual check value
+        refined_answer_quality=refined_answer_quality,
        refined_agent_stats=refined_agent_stats,
        agent_refined_end_time=agent_refined_end_time,
        agent_refined_metrics=agent_refined_metrics,
--- a/backend/onyx/agents/agent_search/deep_search/main/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/states.py
@@ -17,6 +17,7 @@ from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
 from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
 from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
 from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
 from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
@@ -76,6 +77,7 @@ class InitialAnswerUpdate(LoggerUpdate):
    """

    initial_answer: str | None = None
+    answer_error: AgentErrorLog | None = None
    initial_agent_stats: InitialAgentResultStats | None = None
    generated_sub_questions: list[str] = []
    agent_base_end_time: datetime | None = None
@@ -88,6 +90,7 @@ class RefinedAnswerUpdate(RefinedAgentEndStats, LoggerUpdate):
    """

    refined_answer: str | None = None
+    answer_error: AgentErrorLog | None = None
    refined_agent_stats: RefinedAgentStats | None = None
    refined_answer_quality: bool = False

--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
@@ -16,16 +16,46 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
    QueryExpansionUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
+)
+from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    QUERY_REWRITING_PROMPT,
 )
+from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="Query rewriting failed due to LLM timeout - the original question will be used.",
+    rate_limit="Query rewriting failed due to LLM rate limit - the original question will be used.",
+    general_error="Query rewriting failed due to LLM error - the original question will be used.",
+)


+@log_function_time(print_only=True)
 def expand_queries(
    state: ExpandedRetrievalInput,
    config: RunnableConfig,
@@ -41,7 +71,7 @@ def expand_queries(
    node_start_time = datetime.now()
    question = state.question

-    llm = graph_config.tooling.fast_llm
+    model = graph_config.tooling.fast_llm
    sub_question_id = state.sub_question_id
    if sub_question_id is None:
        level, question_num = 0, 0
@@ -54,13 +84,45 @@ def expand_queries(
        )
    ]

-    llm_response_list = dispatch_separated(
-        llm.stream(prompt=msg), dispatch_subquery(level, question_num, writer)
-    )
+    agent_error: AgentErrorLog | None = None
+    llm_response_list: list[BaseMessage_Content] = []
+    llm_response = ""
+    rewritten_queries = []

-    llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
+    try:
+        llm_response_list = run_with_timeout(
+            AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION,
+            dispatch_separated,
+            model.stream(
+                prompt=msg,
+                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
+            ),
+            dispatch_subquery(level, question_num, writer),
+        )
+        llm_response = merge_message_runs(llm_response_list, chunk_separator="")[
+            0
+        ].content
+        rewritten_queries = llm_response.split("\n")
+        log_result = f"Number of expanded queries: {len(rewritten_queries)}"

-    rewritten_queries = llm_response.split("\n")
+    except (LLMTimeoutError, TimeoutError):
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - expand queries")
+        log_result = agent_error.error_result
+
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - expand queries")
+        log_result = agent_error.error_result
+    # use subquestion as query if query generation fails

    return QueryExpansionUpdate(
        expanded_queries=rewritten_queries,
@@ -69,7 +131,7 @@ def expand_queries(
                graph_component="shared - expanded retrieval",
                node_name="expand queries",
                node_start_time=node_start_time,
-                result=f"Number of expanded queries: {len(rewritten_queries)}",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
@@ -21,12 +21,15 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.configs.agent_configs import AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS
 from onyx.configs.agent_configs import AGENT_RERANKING_STATS
 from onyx.context.search.models import InferenceSection
-from onyx.context.search.models import SearchRequest
-from onyx.context.search.pipeline import retrieval_preprocessing
+from onyx.context.search.models import RerankingDetails
 from onyx.context.search.postprocessing.postprocessing import rerank_sections
+from onyx.context.search.postprocessing.postprocessing import should_rerank
 from onyx.db.engine import get_session_context_manager
+from onyx.db.search_settings import get_current_search_settings
+from onyx.utils.timing import log_function_time


+@log_function_time(print_only=True)
 def rerank_documents(
    state: ExpandedRetrievalState, config: RunnableConfig
 ) -> DocRerankingUpdate:
@@ -39,6 +42,8 @@ def rerank_documents(

    # Rerank post retrieval and verification. First, create a search query
    # then create the list of reranked sections
+    # If no question defined/question is None in the state, use the original
+    # question from the search request as query

    graph_config = cast(GraphConfig, config["metadata"]["config"])
    question = (
@@ -47,44 +52,42 @@ def rerank_documents(
    assert (
        graph_config.tooling.search_tool
    ), "search_tool must be provided for agentic search"
-    with get_session_context_manager() as db_session:
-        # we ignore some of the user specified fields since this search is
-        # internal to agentic search, but we still want to pass through
-        # persona (for stuff like document sets) and rerank settings
-        # (to not make an unnecessary db call).
-        search_request = SearchRequest(
-            query=question,
-            persona=graph_config.inputs.search_request.persona,
-            rerank_settings=graph_config.inputs.search_request.rerank_settings,
-        )
-        _search_query = retrieval_preprocessing(
-            search_request=search_request,
-            user=graph_config.tooling.search_tool.user,  # bit of a hack
-            llm=graph_config.tooling.fast_llm,
-            db_session=db_session,
-        )

-    # skip section filtering
+    # Note that these are passed in values from the API and are overrides which are typically None
+    rerank_settings = graph_config.inputs.search_request.rerank_settings
+    allow_agent_reranking = graph_config.behavior.allow_agent_reranking

-    if (
-        _search_query.rerank_settings
-        and _search_query.rerank_settings.rerank_model_name
-        and _search_query.rerank_settings.num_rerank > 0
-        and len(verified_documents) > 0
-    ):
+    if rerank_settings is None:
+        with get_session_context_manager() as db_session:
+            search_settings = get_current_search_settings(db_session)
+            if not search_settings.disable_rerank_for_streaming:
+                rerank_settings = RerankingDetails.from_db_model(search_settings)
+
+    # Initial default: no reranking. Will be overwritten below if reranking is warranted
+    reranked_documents = verified_documents
+
+    if should_rerank(rerank_settings) and len(verified_documents) > 0:
        if len(verified_documents) > 1:
-            reranked_documents = rerank_sections(
-                _search_query,
-                verified_documents,
-            )
+            if not allow_agent_reranking:
+                logger.info("Use of local rerank model without GPU, skipping reranking")
+            # No reranking, stay with verified_documents as default
+
+            else:
+                # Reranking is warranted, use the rerank_sections functon
+                reranked_documents = rerank_sections(
+                    query_str=question,
+                    # if runnable, then rerank_settings is not None
+                    rerank_settings=cast(RerankingDetails, rerank_settings),
+                    sections_to_rerank=verified_documents,
+                )
        else:
-            num = "No" if len(verified_documents) == 0 else "One"
-            logger.warning(f"{num} verified document(s) found, skipping reranking")
-            reranked_documents = verified_documents
+            logger.warning(
+                f"{len(verified_documents)} verified document(s) found, skipping reranking"
+            )
+            # No reranking, stay with verified_documents as default
    else:
        logger.warning("No reranking settings found, using unranked documents")
-        reranked_documents = verified_documents
-
+        # No reranking, stay with verified_documents as default
    if AGENT_RERANKING_STATS:
        fit_scores = get_fit_scores(verified_documents, reranked_documents)
    else:
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/retrieve_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/retrieve_documents.py
@@ -23,12 +23,15 @@ from onyx.configs.agent_configs import AGENT_RETRIEVAL_STATS
 from onyx.context.search.models import InferenceSection
 from onyx.db.engine import get_session_context_manager
 from onyx.tools.models import SearchQueryInfo
+from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.tool_implementations.search.search_tool import (
    SEARCH_RESPONSE_SUMMARY_ID,
 )
 from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
+from onyx.utils.timing import log_function_time


+@log_function_time(print_only=True)
 def retrieve_documents(
    state: RetrievalInput, config: RunnableConfig
 ) -> DocRetrievalUpdate:
@@ -67,9 +70,12 @@ def retrieve_documents(
    with get_session_context_manager() as db_session:
        for tool_response in search_tool.run(
            query=query_to_retrieve,
-            force_no_rerank=True,
-            alternate_db_session=db_session,
-            retrieved_sections_callback=callback_container.append,
+            override_kwargs=SearchToolOverrideKwargs(
+                force_no_rerank=True,
+                alternate_db_session=db_session,
+                retrieved_sections_callback=callback_container.append,
+                skip_query_analysis=not state.base_search,
+            ),
        ):
            # get retrieved docs to send to the rest of the graph
            if tool_response.id == SEARCH_RESPONSE_SUMMARY_ID:
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
@@ -1,5 +1,7 @@
+from datetime import datetime
 from typing import cast

+from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.runnables.config import RunnableConfig

@@ -10,14 +12,40 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
    DocVerificationUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test,
+)
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION
+from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    DOCUMENT_VERIFICATION_PROMPT,
 )
+from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out. The document could not be verified. The document will be treated as 'relevant'",
+    rate_limit="The LLM encountered a rate limit. The document could not be verified. The document will be treated as 'relevant'",
+    general_error="The LLM encountered an error. The document could not be verified. The document will be treated as 'relevant'",
+)


+@log_function_time(print_only=True)
 def verify_documents(
    state: DocVerificationInput, config: RunnableConfig
 ) -> DocVerificationUpdate:
@@ -26,12 +54,14 @@ def verify_documents(

    Args:
        state (DocVerificationInput): The current state
-        config (RunnableConfig): Configuration containing ProSearchConfig
+        config (RunnableConfig): Configuration containing AgentSearchConfig

    Updates:
        verified_documents: list[InferenceSection]
    """

+    node_start_time = datetime.now()
+
    question = state.question
    retrieved_document_to_verify = state.retrieved_document_to_verify
    document_content = retrieved_document_to_verify.combined_content
@@ -51,12 +81,43 @@ def verify_documents(
        )
    ]

-    response = fast_llm.invoke(msg)
+    response: BaseMessage | None = None

-    verified_documents = []
-    if isinstance(response.content, str) and "yes" in response.content.lower():
-        verified_documents.append(retrieved_document_to_verify)
+    verified_documents = [
+        retrieved_document_to_verify
+    ]  # default is to treat document as relevant
+
+    try:
+        response = run_with_timeout(
+            AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION,
+            fast_llm.invoke,
+            prompt=msg,
+            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION,
+        )
+
+        assert isinstance(response.content, str)
+        if not binary_string_test(
+            text=response.content, positive_value=AGENT_POSITIVE_VALUE_STR
+        ):
+            verified_documents = []
+
+    except (LLMTimeoutError, TimeoutError):
+        # In this case, we decide to continue and don't raise an error, as
+        # little harm in letting some docs through that are less relevant.
+        logger.error("LLM Timeout Error - verify documents")
+
+    except LLMRateLimitError:
+        # In this case, we decide to continue and don't raise an error, as
+        # little harm in letting some docs through that are less relevant.
+        logger.error("LLM Rate Limit Error - verify documents")

    return DocVerificationUpdate(
        verified_documents=verified_documents,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="shared - expanded retrieval",
+                node_name="verify documents",
+                node_start_time=node_start_time,
+            )
+        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/states.py
@@ -21,9 +21,13 @@ from onyx.context.search.models import InferenceSection


 class ExpandedRetrievalInput(SubgraphCoreState):
-    question: str = ""
-    base_search: bool = False
+    # exception from 'no default value'for LangGraph input states
+    # Here, sub_question_id default None implies usage for the
+    # original question. This is sometimes needed for nested sub-graphs
+
    sub_question_id: str | None = None
+    question: str
+    base_search: bool


 ## Update/Return States
@@ -34,7 +38,7 @@ class QueryExpansionUpdate(LoggerUpdate, BaseModel):
    log_messages: list[str] = []


-class DocVerificationUpdate(BaseModel):
+class DocVerificationUpdate(LoggerUpdate, BaseModel):
    verified_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []


@@ -88,4 +92,4 @@ class DocVerificationInput(ExpandedRetrievalInput):


 class RetrievalInput(ExpandedRetrievalInput):
-    query_to_retrieve: str = ""
+    query_to_retrieve: str
--- a/backend/onyx/agents/agent_search/models.py
+++ b/backend/onyx/agents/agent_search/models.py
@@ -67,6 +67,7 @@ class GraphSearchConfig(BaseModel):
    # Whether to allow creation of refinement questions (and entity extraction, etc.)
    allow_refinement: bool = True
    skip_gen_ai_answer_generation: bool = False
+    allow_agent_reranking: bool = False


 class GraphConfig(BaseModel):
--- a/backend/onyx/agents/agent_search/orchestration/nodes/call_tool.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/call_tool.py
@@ -28,7 +28,7 @@ def emit_packet(packet: AnswerPacket, writer: StreamWriter) -> None:
    write_custom_event("basic_response", packet, writer)


-def tool_call(
+def call_tool(
    state: ToolChoiceUpdate,
    config: RunnableConfig,
    writer: StreamWriter = lambda _: None,
@@ -37,10 +37,7 @@ def tool_call(

    cast(GraphConfig, config["metadata"]["config"])

-    assert (
-        len(state.tool_choices) > 0
-    ), "Tool call node must have at least one tool choice"
-    tool_choice = state.tool_choices[-1]
+    tool_choice = state.tool_choice
    if tool_choice is None:
        raise ValueError("Cannot invoke tool call node without a tool choice")

--- a/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
@@ -1,21 +1,21 @@
 from typing import cast
 from uuid import uuid4

+from langchain_core.messages import ToolCall
 from langchain_core.runnables.config import RunnableConfig
 from langgraph.types import StreamWriter

-from onyx.agents.agent_search.basic.states import BasicState
 from onyx.agents.agent_search.basic.utils import process_llm_stream
 from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.orchestration.states import ToolChoice
+from onyx.agents.agent_search.orchestration.states import ToolChoiceState
 from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
-from onyx.agents.agent_search.orchestration.utils import get_tool_choice_update
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.chat.tool_handling.tool_response_handler import get_tool_by_name
 from onyx.chat.tool_handling.tool_response_handler import (
    get_tool_call_for_non_tool_calling_llm_impl,
 )
-from onyx.llm.interfaces import ToolChoiceOptions
+from onyx.tools.tool import Tool
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -25,8 +25,8 @@ logger = setup_logger()
 # and a function that handles extracting the necessary fields
 # from the state and config
 # TODO: fan-out to multiple tool call nodes? Make this configurable?
-def llm_tool_choice(
-    state: BasicState,
+def choose_tool(
+    state: ToolChoiceState,
    config: RunnableConfig,
    writer: StreamWriter = lambda _: None,
 ) -> ToolChoiceUpdate:
@@ -72,13 +72,11 @@ def llm_tool_choice(
    # This only happens if the tool call was forced or we are using a non-tool calling LLM.
    if tool and tool_args:
        return ToolChoiceUpdate(
-            tool_choices=[
-                ToolChoice(
-                    tool=tool,
-                    tool_args=tool_args,
-                    id=str(uuid4()),
-                )
-            ],
+            tool_choice=ToolChoice(
+                tool=tool,
+                tool_args=tool_args,
+                id=str(uuid4()),
+            ),
        )

    # if we're skipping gen ai answer generation, we should only
@@ -86,7 +84,7 @@ def llm_tool_choice(
    # the tool calling llm in the stream() below)
    if skip_gen_ai_answer_generation and not force_use_tool.force_use:
        return ToolChoiceUpdate(
-            tool_choices=[None],
+            tool_choice=None,
        )

    built_prompt = (
@@ -100,9 +98,15 @@ def llm_tool_choice(
        # For tool calling LLMs, we want to insert the task prompt as part of this flow, this is because the LLM
        # may choose to not call any tools and just generate the answer, in which case the task prompt is needed.
        prompt=built_prompt,
-        tools=[tool.tool_definition() for tool in tools] or None,
+        tools=(
+            [tool.tool_definition() for tool in tools] or None
+            if using_tool_calling_llm
+            else None
+        ),
        tool_choice=(
-            ToolChoiceOptions.REQUIRED if tools and force_use_tool.force_use else None
+            "required"
+            if tools and force_use_tool.force_use and using_tool_calling_llm
+            else None
        ),
        structured_response_format=structured_response_format,
    )
@@ -114,4 +118,45 @@ def llm_tool_choice(
        writer,
    )

-    return get_tool_choice_update(tool_message, tools)
+    # If no tool calls are emitted by the LLM, we should not choose a tool
+    if len(tool_message.tool_calls) == 0:
+        logger.debug("No tool calls emitted by LLM")
+        return ToolChoiceUpdate(
+            tool_choice=None,
+        )
+
+    # TODO: here we could handle parallel tool calls. Right now
+    # we just pick the first one that matches.
+    selected_tool: Tool | None = None
+    selected_tool_call_request: ToolCall | None = None
+    for tool_call_request in tool_message.tool_calls:
+        known_tools_by_name = [
+            tool for tool in tools if tool.name == tool_call_request["name"]
+        ]
+
+        if known_tools_by_name:
+            selected_tool = known_tools_by_name[0]
+            selected_tool_call_request = tool_call_request
+            break
+
+        logger.error(
+            "Tool call requested with unknown name field. \n"
+            f"tools: {tools}"
+            f"tool_call_request: {tool_call_request}"
+        )
+
+    if not selected_tool or not selected_tool_call_request:
+        raise ValueError(
+            f"Tool call attempted with tool {selected_tool}, request {selected_tool_call_request}"
+        )
+
+    logger.debug(f"Selected tool: {selected_tool.name}")
+    logger.debug(f"Selected tool call request: {selected_tool_call_request}")
+
+    return ToolChoiceUpdate(
+        tool_choice=ToolChoice(
+            tool=selected_tool,
+            tool_args=selected_tool_call_request["args"],
+            id=selected_tool_call_request["id"],
+        ),
+    )
--- a/backend/onyx/agents/agent_search/orchestration/nodes/basic_use_tool_response.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/basic_use_tool_response.py
@@ -4,11 +4,10 @@ from langchain_core.messages import AIMessageChunk
 from langchain_core.runnables.config import RunnableConfig
 from langgraph.types import StreamWriter

+from onyx.agents.agent_search.basic.states import BasicOutput
 from onyx.agents.agent_search.basic.states import BasicState
 from onyx.agents.agent_search.basic.utils import process_llm_stream
 from onyx.agents.agent_search.models import GraphConfig
-from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
-from onyx.agents.agent_search.orchestration.utils import get_tool_choice_update
 from onyx.chat.models import LlmDoc
 from onyx.chat.models import OnyxContexts
 from onyx.tools.tool_implementations.search.search_tool import (
@@ -24,15 +23,11 @@ logger = setup_logger()

 def basic_use_tool_response(
    state: BasicState, config: RunnableConfig, writer: StreamWriter = lambda _: None
-) -> ToolChoiceUpdate:
+) -> BasicOutput:
    agent_config = cast(GraphConfig, config["metadata"]["config"])
    structured_response_format = agent_config.inputs.structured_response_format
    llm = agent_config.tooling.primary_llm
-
-    assert (
-        len(state.tool_choices) > 0
-    ), "Tool choice node must have at least one tool choice"
-    tool_choice = state.tool_choices[-1]
+    tool_choice = state.tool_choice
    if tool_choice is None:
        raise ValueError("Tool choice is None")
    tool = tool_choice.tool
@@ -66,8 +61,6 @@ def basic_use_tool_response(
        stream = llm.stream(
            prompt=new_prompt_builder.build(),
            structured_response_format=structured_response_format,
-            tools=[_tool.tool_definition() for _tool in agent_config.tooling.tools],
-            tool_choice=None,
        )

        # For now, we don't do multiple tool calls, so we ignore the tool_message
@@ -81,4 +74,4 @@ def basic_use_tool_response(
            displayed_search_results=initial_search_results or final_search_results,
        )

-    return get_tool_choice_update(new_tool_call_chunk, agent_config.tooling.tools)
+    return BasicOutput(tool_call_chunk=new_tool_call_chunk)
--- a/backend/onyx/agents/agent_search/orchestration/states.py
+++ b/backend/onyx/agents/agent_search/orchestration/states.py
@@ -1,6 +1,3 @@
-from operator import add
-from typing import Annotated
-
 from pydantic import BaseModel

 from onyx.chat.prompt_builder.answer_prompt_builder import PromptSnapshot
@@ -44,7 +41,7 @@ class ToolChoice(BaseModel):


 class ToolChoiceUpdate(BaseModel):
-    tool_choices: Annotated[list[ToolChoice | None], add] = []
+    tool_choice: ToolChoice | None = None


 class ToolChoiceState(ToolChoiceUpdate, ToolChoiceInput):
--- a/backend/onyx/agents/agent_search/orchestration/utils.py
+++ b/backend/onyx/agents/agent_search/orchestration/utils.py
@@ -1,58 +0,0 @@
-from langchain_core.messages import AIMessageChunk
-from langchain_core.messages import ToolCall
-
-from onyx.agents.agent_search.orchestration.states import ToolChoice
-from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
-from onyx.tools.tool import Tool
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def get_tool_choice_update(
-    tool_message: AIMessageChunk, tools: list[Tool]
-) -> ToolChoiceUpdate:
-    # If no tool calls are emitted by the LLM, we should not choose a tool
-    if len(tool_message.tool_calls) == 0:
-        logger.debug("No tool calls emitted by LLM")
-        return ToolChoiceUpdate(
-            tool_choices=[None],
-        )
-
-    # TODO: here we could handle parallel tool calls. Right now
-    # we just pick the first one that matches.
-    selected_tool: Tool | None = None
-    selected_tool_call_request: ToolCall | None = None
-    for tool_call_request in tool_message.tool_calls:
-        known_tools_by_name = [
-            tool for tool in tools if tool.name == tool_call_request["name"]
-        ]
-
-        if known_tools_by_name:
-            selected_tool = known_tools_by_name[0]
-            selected_tool_call_request = tool_call_request
-            break
-
-        logger.error(
-            "Tool call requested with unknown name field. \n"
-            f"tools: {tools}"
-            f"tool_call_request: {tool_call_request}"
-        )
-
-    if not selected_tool or not selected_tool_call_request:
-        raise ValueError(
-            f"Tool call attempted with tool {selected_tool}, request {selected_tool_call_request}"
-        )
-
-    logger.debug(f"Selected tool: {selected_tool.name}")
-    logger.debug(f"Selected tool call request: {selected_tool_call_request}")
-
-    return ToolChoiceUpdate(
-        tool_choices=[
-            ToolChoice(
-                tool=selected_tool,
-                tool_args=selected_tool_call_request["args"],
-                id=selected_tool_call_request["id"],
-            )
-        ],
-    )
--- a/backend/onyx/agents/agent_search/run_graph.py
+++ b/backend/onyx/agents/agent_search/run_graph.py
@@ -12,7 +12,7 @@ from onyx.agents.agent_search.deep_search.main.graph_builder import (
    main_graph_builder as main_graph_builder_a,
 )
 from onyx.agents.agent_search.deep_search.main.states import (
-    MainInput as MainInput_a,
+    MainInput as MainInput,
 )
 from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
@@ -21,6 +21,7 @@ from onyx.chat.models import AnswerPacket
 from onyx.chat.models import AnswerStream
 from onyx.chat.models import ExtendedToolResponse
 from onyx.chat.models import RefinedAnswerImprovement
+from onyx.chat.models import StreamingError
 from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import SubQueryPiece
 from onyx.chat.models import SubQuestionPiece
@@ -33,6 +34,7 @@ from onyx.llm.factory import get_default_llms
 from onyx.tools.tool_runner import ToolCallKickoff
 from onyx.utils.logger import setup_logger

+
 logger = setup_logger()

 _COMPILED_GRAPH: CompiledStateGraph | None = None
@@ -72,13 +74,15 @@ def _parse_agent_event(
            return cast(AnswerPacket, event["data"])
        elif event["name"] == "refined_answer_improvement":
            return cast(RefinedAnswerImprovement, event["data"])
+        elif event["name"] == "refined_sub_question_creation_error":
+            return cast(StreamingError, event["data"])
    return None


 def manage_sync_streaming(
    compiled_graph: CompiledStateGraph,
    config: GraphConfig,
-    graph_input: BasicInput | MainInput_a,
+    graph_input: BasicInput | MainInput,
 ) -> Iterable[StreamEvent]:
    message_id = config.persistence.message_id if config.persistence else None
    for event in compiled_graph.stream(
@@ -92,7 +96,7 @@ def manage_sync_streaming(
 def run_graph(
    compiled_graph: CompiledStateGraph,
    config: GraphConfig,
-    input: BasicInput | MainInput_a,
+    input: BasicInput | MainInput,
 ) -> AnswerStream:
    config.behavior.perform_initial_search_decomposition = (
        INITIAL_SEARCH_DECOMPOSITION_ENABLED
@@ -123,9 +127,7 @@ def run_main_graph(
 ) -> AnswerStream:
    compiled_graph = load_compiled_graph()

-    input = MainInput_a(
-        base_question=config.inputs.search_request.query, log_messages=[]
-    )
+    input = MainInput(log_messages=[])

    # Agent search is not a Tool per se, but this is helpful for the frontend
    yield ToolCallKickoff(
@@ -140,7 +142,7 @@ def run_basic_graph(
 ) -> AnswerStream:
    graph = basic_graph_builder()
    compiled_graph = graph.compile()
-    input = BasicInput()
+    input = BasicInput(unused=True)
    return run_graph(compiled_graph, config, input)


@@ -172,9 +174,7 @@ if __name__ == "__main__":
            # search_request.persona = get_persona_by_id(1, None, db_session)
            # config.perform_initial_search_path_decision = False
            config.behavior.perform_initial_search_decomposition = True
-            input = MainInput_a(
-                base_question=config.inputs.search_request.query, log_messages=[]
-            )
+            input = MainInput(log_messages=[])

            tool_responses: list = []
            for output in run_graph(compiled_graph, config, input):
--- a/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
@@ -7,6 +7,7 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.models import (
    AgentPromptEnrichmentComponents,
 )
+from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_persona_agent_prompt_expressions,
 )
@@ -40,13 +41,7 @@ def build_sub_question_answer_prompt(

    date_str = build_date_time_string()

-    # TODO: This should include document metadata and title
-    docs_format_list = [
-        f"Document Number: [D{doc_num + 1}]\nContent: {doc.combined_content}\n\n"
-        for doc_num, doc in enumerate(docs)
-    ]
-
-    docs_str = "\n\n".join(docs_format_list)
+    docs_str = format_docs(docs)

    docs_str = trim_prompt_piece(
        config,
@@ -150,3 +145,38 @@ def get_prompt_enrichment_components(
        history=history,
        date_str=date_str,
    )
+
+
+def binary_string_test(text: str, positive_value: str = "yes") -> bool:
+    """
+    Tests if a string contains a positive value (case-insensitive).
+
+    Args:
+        text: The string to test
+        positive_value: The value to look for (defaults to "yes")
+
+    Returns:
+        True if the positive value is found in the text
+    """
+    return positive_value.lower() in text.lower()
+
+
+def binary_string_test_after_answer_separator(
+    text: str, positive_value: str = "yes", separator: str = "Answer:"
+) -> bool:
+    """
+    Tests if a string contains a positive value (case-insensitive).
+
+    Args:
+        text: The string to test
+        positive_value: The value to look for (defaults to "yes")
+
+    Returns:
+        True if the positive value is found in the text
+    """
+
+    if separator not in text:
+        return False
+    relevant_text = text.split(f"{separator}")[-1]
+
+    return binary_string_test(relevant_text, positive_value)
--- a/backend/onyx/agents/agent_search/shared_graph_utils/calculations.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/calculations.py
@@ -1,7 +1,11 @@
 import numpy as np

+from onyx.agents.agent_search.shared_graph_utils.models import AnswerGenerationDocuments
 from onyx.agents.agent_search.shared_graph_utils.models import RetrievalFitScoreMetrics
 from onyx.agents.agent_search.shared_graph_utils.models import RetrievalFitStats
+from onyx.agents.agent_search.shared_graph_utils.operators import (
+    dedup_inference_section_list,
+)
 from onyx.chat.models import SectionRelevancePiece
 from onyx.context.search.models import InferenceSection
 from onyx.utils.logger import setup_logger
@@ -96,3 +100,106 @@ def get_fit_scores(
    )

    return fit_eval
+
+
+def get_answer_generation_documents(
+    relevant_docs: list[InferenceSection],
+    context_documents: list[InferenceSection],
+    original_question_docs: list[InferenceSection],
+    max_docs: int,
+) -> AnswerGenerationDocuments:
+    """
+    Create a deduplicated list of documents to stream, prioritizing relevant docs.
+
+    Args:
+        relevant_docs: Primary documents to include
+        context_documents: Additional context documents to append
+        original_question_docs: Original question documents to append
+        max_docs: Maximum number of documents to return
+
+    Returns:
+        List of deduplicated documents, limited to max_docs
+    """
+    # get relevant_doc ids
+    relevant_doc_ids = [doc.center_chunk.document_id for doc in relevant_docs]
+
+    # Start with relevant docs or fallback to original question docs
+    streaming_documents = relevant_docs.copy()
+
+    # Use a set for O(1) lookups of document IDs
+    seen_doc_ids = {doc.center_chunk.document_id for doc in streaming_documents}
+
+    # Combine additional documents to check in one iteration
+    additional_docs = context_documents + original_question_docs
+    for doc_idx, doc in enumerate(additional_docs):
+        doc_id = doc.center_chunk.document_id
+        if doc_id not in seen_doc_ids:
+            streaming_documents.append(doc)
+            seen_doc_ids.add(doc_id)
+
+    streaming_documents = dedup_inference_section_list(streaming_documents)
+
+    relevant_streaming_docs = [
+        doc
+        for doc in streaming_documents
+        if doc.center_chunk.document_id in relevant_doc_ids
+    ]
+    relevant_streaming_docs = dedup_sort_inference_section_list(relevant_streaming_docs)
+
+    additional_streaming_docs = [
+        doc
+        for doc in streaming_documents
+        if doc.center_chunk.document_id not in relevant_doc_ids
+    ]
+    additional_streaming_docs = dedup_sort_inference_section_list(
+        additional_streaming_docs
+    )
+
+    for doc in additional_streaming_docs:
+        if doc.center_chunk.score:
+            doc.center_chunk.score += -2.0
+        else:
+            doc.center_chunk.score = -2.0
+
+    sorted_streaming_documents = relevant_streaming_docs + additional_streaming_docs
+
+    return AnswerGenerationDocuments(
+        streaming_documents=sorted_streaming_documents[:max_docs],
+        context_documents=relevant_streaming_docs[:max_docs],
+    )
+
+
+def dedup_sort_inference_section_list(
+    sections: list[InferenceSection],
+) -> list[InferenceSection]:
+    """Deduplicates InferenceSections by document_id and sorts by score.
+
+    Args:
+        sections: List of InferenceSections to deduplicate and sort
+
+    Returns:
+        Deduplicated list of InferenceSections sorted by score in descending order
+    """
+    # dedupe/merge with existing framework
+    sections = dedup_inference_section_list(sections)
+
+    # Use dict to deduplicate by document_id, keeping highest scored version
+    unique_sections: dict[str, InferenceSection] = {}
+    for section in sections:
+        doc_id = section.center_chunk.document_id
+        if doc_id not in unique_sections:
+            unique_sections[doc_id] = section
+            continue
+
+        # Keep version with higher score
+        existing_score = unique_sections[doc_id].center_chunk.score or 0
+        new_score = section.center_chunk.score or 0
+        if new_score > existing_score:
+            unique_sections[doc_id] = section
+
+    # Sort by score in descending order, handling None scores
+    sorted_sections = sorted(
+        unique_sections.values(), key=lambda x: x.center_chunk.score or 0, reverse=True
+    )
+
+    return sorted_sections
--- a/backend/onyx/agents/agent_search/shared_graph_utils/constants.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/constants.py
@@ -0,0 +1,19 @@
+from enum import Enum
+
+AGENT_LLM_TIMEOUT_MESSAGE = "The agent timed out. Please try again."
+AGENT_LLM_ERROR_MESSAGE = "The agent encountered an error. Please try again."
+AGENT_LLM_RATELIMIT_MESSAGE = (
+    "The agent encountered a rate limit error. Please try again."
+)
+LLM_ANSWER_ERROR_MESSAGE = "The question was not answered due to an LLM error."
+
+AGENT_POSITIVE_VALUE_STR = "yes"
+AGENT_NEGATIVE_VALUE_STR = "no"
+
+AGENT_ANSWER_SEPARATOR = "Answer:"
+
+
+class AgentLLMErrorType(str, Enum):
+    TIMEOUT = "timeout"
+    RATE_LIMIT = "rate_limit"
+    GENERAL_ERROR = "general_error"
--- a/backend/onyx/agents/agent_search/shared_graph_utils/models.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/models.py
@@ -1,3 +1,5 @@
+from typing import Any
+
 from pydantic import BaseModel

 from onyx.agents.agent_search.deep_search.main.models import (
@@ -56,6 +58,12 @@ class InitialAgentResultStats(BaseModel):
    agent_effectiveness: dict[str, float | int | None]


+class AgentErrorLog(BaseModel):
+    error_message: str
+    error_type: str
+    error_result: str
+
+
 class RefinedAgentStats(BaseModel):
    revision_doc_efficiency: float | None
    revision_question_efficiency: float | None
@@ -110,6 +118,11 @@ class SubQuestionAnswerResults(BaseModel):
    sub_question_retrieval_stats: AgentChunkRetrievalStats


+class StructuredSubquestionDocuments(BaseModel):
+    cited_documents: list[InferenceSection]
+    context_documents: list[InferenceSection]
+
+
 class CombinedAgentMetrics(BaseModel):
    timings: AgentTimings
    base_metrics: AgentBaseMetrics | None
@@ -126,3 +139,17 @@ class AgentPromptEnrichmentComponents(BaseModel):
    persona_prompts: PersonaPromptExpressions
    history: str
    date_str: str
+
+
+class LLMNodeErrorStrings(BaseModel):
+    timeout: str = "LLM Timeout Error"
+    rate_limit: str = "LLM Rate Limit Error"
+    general_error: str = "General LLM Error"
+
+
+class AnswerGenerationDocuments(BaseModel):
+    streaming_documents: list[InferenceSection]
+    context_documents: list[InferenceSection]
+
+
+BaseMessage_Content = str | list[str | dict[str, Any]]
--- a/backend/onyx/agents/agent_search/shared_graph_utils/operators.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/operators.py
@@ -12,6 +12,13 @@ def dedup_inference_sections(
    return deduped


+def dedup_inference_section_list(
+    list: list[InferenceSection],
+) -> list[InferenceSection]:
+    deduped = _merge_sections(list)
+    return deduped
+
+
 def dedup_question_answer_results(
    question_answer_results_1: list[SubQuestionAnswerResults],
    question_answer_results_2: list[SubQuestionAnswerResults],
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -20,10 +20,18 @@ from onyx.agents.agent_search.models import GraphInputs
 from onyx.agents.agent_search.models import GraphPersistence
 from onyx.agents.agent_search.models import GraphSearchConfig
 from onyx.agents.agent_search.models import GraphTooling
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
 from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
 from onyx.agents.agent_search.shared_graph_utils.models import PersonaPromptExpressions
+from onyx.agents.agent_search.shared_graph_utils.models import (
+    StructuredSubquestionDocuments,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import SubQuestionAnswerResults
+from onyx.agents.agent_search.shared_graph_utils.operators import (
+    dedup_inference_section_list,
+)
 from onyx.chat.models import AnswerPacket
 from onyx.chat.models import AnswerStyleConfig
 from onyx.chat.models import CitationConfig
@@ -34,6 +42,10 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
+)
+from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION
 from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from onyx.configs.constants import DEFAULT_PERSONA_ID
@@ -46,6 +58,8 @@ from onyx.context.search.models import SearchRequest
 from onyx.db.engine import get_session_context_manager
 from onyx.db.persona import get_persona_by_id
 from onyx.db.persona import Persona
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.llm.interfaces import LLM
 from onyx.prompts.agent_search import (
    ASSISTANT_SYSTEM_PROMPT_DEFAULT,
@@ -58,6 +72,7 @@ from onyx.prompts.agent_search import (
 )
 from onyx.prompts.prompt_utils import handle_onyx_date_awareness
 from onyx.tools.force import ForceUseTool
+from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.tool_constructor import SearchToolConfig
 from onyx.tools.tool_implementations.search.search_tool import (
    SEARCH_RESPONSE_SUMMARY_ID,
@@ -65,8 +80,10 @@ from onyx.tools.tool_implementations.search.search_tool import (
 from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.utils import explicit_tool_calling_supported
+from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout

-BaseMessage_Content = str | list[str | dict[str, Any]]
+logger = setup_logger()


 # Post-processing
@@ -218,7 +235,10 @@ def get_test_config(
        using_tool_calling_llm=using_tool_calling_llm,
    )

-    chat_session_id = os.environ.get("ONYX_AS_CHAT_SESSION_ID")
+    chat_session_id = (
+        os.environ.get("ONYX_AS_CHAT_SESSION_ID")
+        or "00000000-0000-0000-0000-000000000000"
+    )
    assert (
        chat_session_id is not None
    ), "ONYX_AS_CHAT_SESSION_ID must be set for backend tests"
@@ -341,8 +361,12 @@ def retrieve_search_docs(
    with get_session_context_manager() as db_session:
        for tool_response in search_tool.run(
            query=question,
-            force_no_rerank=True,
-            alternate_db_session=db_session,
+            override_kwargs=SearchToolOverrideKwargs(
+                force_no_rerank=True,
+                alternate_db_session=db_session,
+                retrieved_sections_callback=None,
+                skip_query_analysis=False,
+            ),
        ):
            # get retrieved docs to send to the rest of the graph
            if tool_response.id == SEARCH_RESPONSE_SUMMARY_ID:
@@ -372,8 +396,26 @@ def summarize_history(
        )
    )

-    history_response = llm.invoke(history_context_prompt)
+    try:
+        history_response = run_with_timeout(
+            AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION,
+            llm.invoke,
+            history_context_prompt,
+            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
+        )
+    except (LLMTimeoutError, TimeoutError):
+        logger.error("LLM Timeout Error - summarize history")
+        return (
+            history  # this is what is done at this point anyway, so we default to this
+        )
+    except LLMRateLimitError:
+        logger.error("LLM Rate Limit Error - summarize history")
+        return (
+            history  # this is what is done at this point anyway, so we default to this
+        )
+
    assert isinstance(history_response.content, str)
+
    return history_response.content


@@ -439,3 +481,27 @@ def remove_document_citations(text: str) -> str:
    #   \d+  - one or more digits
    #   \]   - literal ] character
    return re.sub(r"\[(?:D|Q)?\d+\]", "", text)
+
+
+def get_deduplicated_structured_subquestion_documents(
+    sub_question_results: list[SubQuestionAnswerResults],
+) -> StructuredSubquestionDocuments:
+    """
+    Extract and deduplicate all cited documents from sub-question results.
+
+    Args:
+        sub_question_results: List of sub-question results containing cited documents
+
+    Returns:
+        Deduplicated list of cited documents
+    """
+    cited_docs = [
+        doc for result in sub_question_results for doc in result.cited_documents
+    ]
+    context_docs = [
+        doc for result in sub_question_results for doc in result.context_documents
+    ]
+    return StructuredSubquestionDocuments(
+        cited_documents=dedup_inference_section_list(cited_docs),
+        context_documents=dedup_inference_section_list(context_docs),
+    )
--- a/Show More
+++ b/Show More