improve scroll

Fix nginx for prod compose file
Fix integration tests (#4059 )
2026-02-18 16:25:45 +00:00 · 2025-02-22 13:25:23 -08:00 · 2025-02-21 16:57:54 -08:00 · 2025-02-21 15:56:11 -08:00 · 2025-02-21 14:00:32 -08:00 · 2025-02-21 13:40:07 -08:00
237 changed files with 4357 additions and 2905 deletions
--- a/.github/workflows/pr-integration-tests-parallel.yml
+++ b/.github/workflows/pr-integration-tests-parallel.yml
@@ -1,153 +0,0 @@
-name: Run Integration Tests v3
-concurrency:
-  group: Run-Integration-Tests-Parallel-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
-  cancel-in-progress: true
-
-on:
-  merge_group:
-  pull_request:
-    branches:
-      - main
-      - "release/**"
-
-env:
-  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-  CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }}
-  CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }}
-  CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }}
-
-jobs:
-  integration-tests:
-    # See https://runs-on.com/runners/linux/
-    runs-on:
-      [runs-on, runner=32cpu-linux-x64, ram=64, "run-id=${{ github.run_id }}"]
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build integration test Docker image
-        uses: ./.github/actions/custom-build-and-push
-        with:
-          context: ./backend
-          file: ./backend/tests/integration/Dockerfile
-          platforms: linux/amd64
-          tags: danswer/danswer-integration:test
-          push: false
-          load: true
-          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-parallel/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
-          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-parallel/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max
-
-      - name: Run Standard Integration Tests
-        run: |
-          # Print a message indicating that tests are starting
-          echo "Running integration tests..."
-          
-          # Create a directory for test logs that will be mounted into the container
-          mkdir -p ${{ github.workspace }}/test_logs
-          chmod 777 ${{ github.workspace }}/test_logs
-          
-          # Run the integration tests in a Docker container
-          # Mount the Docker socket to allow Docker-in-Docker (DinD)
-          # Mount the test_logs directory to capture logs
-          # Use host network for easier communication with other services
-          docker run \
-            -v /var/run/docker.sock:/var/run/docker.sock \
-            -v ${{ github.workspace }}/test_logs:/tmp \
-            --network host \
-            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
-            -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \
-            -e CONFLUENCE_TEST_SPACE_URL=${CONFLUENCE_TEST_SPACE_URL} \
-            -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
-            -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
-            -e TEST_WEB_HOSTNAME=test-runner \
-            -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
-            -e MOCK_CONNECTOR_SERVER_PORT=8001 \
-            danswer/danswer-integration:test \
-            python /app/tests/integration/run.py
-        continue-on-error: true
-        id: run_tests
-
-      - name: Check test results
-        run: |
-          if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
-            echo "Integration tests failed. Exiting with error."
-            exit 1
-          else
-            echo "All integration tests passed successfully."
-          fi
-
-      - name: Collect log files
-        if: success() || failure()
-        run: |
-          # Create a directory for logs
-          mkdir -p ${{ github.workspace }}/logs
-          mkdir -p ${{ github.workspace }}/logs/shared_services
-          
-          # Copy all relevant log files from the mounted directory
-          cp ${{ github.workspace }}/test_logs/api_server_*.txt ${{ github.workspace }}/logs/ || true
-          cp ${{ github.workspace }}/test_logs/background_*.txt ${{ github.workspace }}/logs/ || true
-          cp ${{ github.workspace }}/test_logs/shared_model_server.txt ${{ github.workspace }}/logs/ || true
-          
-          # Collect logs from shared services (Docker containers)
-          # Note: using a wildcard for the UUID part of the stack name
-          docker ps -a --filter "name=base-onyx-" --format "{{.Names}}" | while read container; do
-            echo "Collecting logs from $container"
-            docker logs $container > "${{ github.workspace }}/logs/shared_services/${container}.log" 2>&1 || true
-          done
-          
-          # Also collect Redis container logs
-          docker ps -a --filter "name=redis-onyx-" --format "{{.Names}}" | while read container; do
-            echo "Collecting logs from $container"
-            docker logs $container > "${{ github.workspace }}/logs/shared_services/${container}.log" 2>&1 || true
-          done
-          
-          # List collected logs
-          echo "Collected log files:"
-          ls -l ${{ github.workspace }}/logs/
-          echo "Collected shared services logs:"
-          ls -l ${{ github.workspace }}/logs/shared_services/
-
-      - name: Upload logs
-        if: success() || failure()
-        uses: actions/upload-artifact@v4
-        with:
-          name: integration-test-logs
-          path: |
-            ${{ github.workspace }}/logs/
-            ${{ github.workspace }}/logs/shared_services/
-          retention-days: 5
-
-      # save before stopping the containers so the logs can be captured
-      # - name: Save Docker logs
-      #   if: success() || failure()
-      #   run: |
-      #     cd deployment/docker_compose
-      #     docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
-      #     mv docker-compose.log ${{ github.workspace }}/docker-compose.log
-
-      # - name: Stop Docker containers
-      #   run: |
-      #     cd deployment/docker_compose
-      #     docker compose -f docker-compose.dev.yml -p danswer-stack down -v
-
-      # - name: Upload logs
-      #   if: success() || failure()
-      #   uses: actions/upload-artifact@v4
-      #   with:
-      #     name: docker-logs
-      #     path: ${{ github.workspace }}/docker-compose.log
-
-      # - name: Stop Docker containers
-      #   run: |
-      #     cd deployment/docker_compose
-      #     docker compose -f docker-compose.dev.yml -p danswer-stack down -v
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -5,10 +5,10 @@ concurrency:

 on:
  merge_group:
-  # pull_request:
-  #   branches:
-  #     - main
-  #     - "release/**"
+  pull_request:
+    branches:
+      - main
+      - "release/**"

 env:
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -145,7 +145,7 @@ jobs:
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack down -v
-      
+
      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      - name: Start Docker containers
        run: |
@@ -157,6 +157,7 @@ jobs:
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
+          INTEGRATION_TESTS_MODE=true \
          docker compose -f docker-compose.dev.yml -p onyx-stack up -d
        id: start_docker

@@ -199,7 +200,7 @@ jobs:
          cd backend/tests/integration/mock_services
          docker compose -f docker-compose.mock-it-services.yml \
            -p mock-it-services-stack up -d
-      
+
      # NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
      - name: Run Standard Integration Tests
        run: |
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -74,7 +74,9 @@ jobs:
          python -m pip install --upgrade pip
          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-
+          playwright install chromium
+          playwright install-deps chromium
+          
      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -1,10 +1,16 @@
-name: Connector Tests
+name: Model Server Tests

 on:
  schedule:
    # This cron expression runs the job daily at 16:00 UTC (9am PT)
    - cron: "0 16 * * *"
-
+  workflow_dispatch:
+    inputs:
+      branch:
+        description: 'Branch to run the workflow on'
+        required: false
+        default: 'main'
+        
 env:
  # Bedrock
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -26,6 +32,23 @@ jobs:
      - name: Checkout code
        uses: actions/checkout@v4

+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+
+      # tag every docker image with "test" so that we can spin up the correct set
+      # of images during testing
+
+      # We don't need to build the Web Docker image since it's not yet used
+      # in the integration tests. We have a separate action to verify that it builds
+      # successfully.
+      - name: Pull Model Server Docker image
+        run: |
+          docker pull onyxdotapp/onyx-model-server:latest
+          docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
+          
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
@@ -41,6 +64,49 @@ jobs:
          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt

+      - name: Start Docker containers
+        run: |
+          cd deployment/docker_compose
+          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
+          AUTH_TYPE=basic \
+          REQUIRE_EMAIL_VERIFICATION=false \
+          DISABLE_TELEMETRY=true \
+          IMAGE_TAG=test \
+          docker compose -f docker-compose.dev.yml -p onyx-stack up -d indexing_model_server
+        id: start_docker
+
+      - name: Wait for service to be ready
+        run: |
+          echo "Starting wait-for-service script..."
+
+          start_time=$(date +%s)
+          timeout=300  # 5 minutes in seconds
+
+          while true; do
+            current_time=$(date +%s)
+            elapsed_time=$((current_time - start_time))
+            
+            if [ $elapsed_time -ge $timeout ]; then
+              echo "Timeout reached. Service did not become ready in 5 minutes."
+              exit 1
+            fi
+            
+            # Use curl with error handling to ignore specific exit code 56
+            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
+            
+            if [ "$response" = "200" ]; then
+              echo "Service is ready!"
+              break
+            elif [ "$response" = "curl_error" ]; then
+              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
+            else
+              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
+            fi
+            
+            sleep 5
+          done
+          echo "Finished waiting for service."
+          
      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
@@ -56,3 +122,10 @@ jobs:
            -H 'Content-type: application/json' \
            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
            $SLACK_WEBHOOK
+            
+      - name: Stop Docker containers
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.dev.yml -p onyx-stack down -v
+          
--- a/README.md
+++ b/README.md
@@ -24,113 +24,93 @@
 </a>
 </p>

-<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI Assistant connected to your company's docs, apps, and people.
-Onyx provides a Chat interface and plugs into any LLM of your choice. Onyx can be deployed anywhere and for any
-scale - on a laptop, on-premise, or to cloud. Since you own the deployment, your user data and chats are fully in your
-own control. Onyx is dual Licensed with most of it under MIT license and designed to be modular and easily extensible. The system also comes fully ready
-for production usage with user authentication, role management (admin/basic users), chat persistence, and a UI for
-configuring AI Assistants.
+<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI platform connected to your company's docs, apps, and people.
+Onyx provides a feature rich Chat interface and plugs into any LLM of your choice.
+Keep knowledge and access controls sync-ed across over 40 connectors like Google Drive, Slack, Confluence, Salesforce, etc.
+Create custom AI agents with unique prompts, knowledge, and actions that the agents can take.
+Onyx can be deployed securely anywhere and for any scale - on a laptop, on-premise, or to cloud.

-Onyx also serves as a Enterprise Search across all common workplace tools such as Slack, Google Drive, Confluence, etc.
-By combining LLMs and team specific knowledge, Onyx becomes a subject matter expert for the team. Imagine ChatGPT if
-it had access to your team's unique knowledge! It enables questions such as "A customer wants feature X, is this already
-supported?" or "Where's the pull request for feature Y?"

-<h3>Usage</h3>
+<h3>Feature Highlights</h3>

-Onyx Web App:
+**Deep research over your team's knowledge:**

-https://github.com/onyx-dot-app/onyx/assets/32520769/563be14c-9304-47b5-bf0a-9049c2b6f410
+https://private-user-images.githubusercontent.com/32520769/414509312-48392e83-95d0-4fb5-8650-a396e05e0a32.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3Mzk5Mjg2MzYsIm5iZiI6MTczOTkyODMzNiwicGF0aCI6Ii8zMjUyMDc2OS80MTQ1MDkzMTItNDgzOTJlODMtOTVkMC00ZmI1LTg2NTAtYTM5NmUwNWUwYTMyLm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAyMTklMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMjE5VDAxMjUzNlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPWFhMzk5Njg2Y2Y5YjFmNDNiYTQ2YzM5ZTg5YWJiYTU2NWMyY2YwNmUyODE2NWUxMDRiMWQxZWJmODI4YTA0MTUmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.a9D8A0sgKE9AoaoE-mfFbJ6_OKYeqaf7TZ4Han2JfW8

-Or, plug Onyx into your existing Slack workflows (more integrations to come 😁):

-https://github.com/onyx-dot-app/onyx/assets/25087905/3e19739b-d178-4371-9a38-011430bdec1b
+**Use Onyx as a secure AI Chat with any LLM:**
+
+![Onyx Chat Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxChatSilentDemo.gif)
+
+
+**Easily set up connectors to your apps:**
+
+![Onyx Connector Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxConnectorSilentDemo.gif)
+
+
+**Access Onyx where your team already works:**
+
+![Onyx Bot Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxBot.png)

-For more details on the Admin UI to manage connectors and users, check out our
-<strong><a href="https://www.youtube.com/watch?v=geNzY1nbCnU">Full Video Demo</a></strong>!

 ## Deployment
+**To try it out for free and get started in seconds, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.

-Onyx can easily be run locally (even on a laptop) or deployed on a virtual machine with a single
+Onyx can also be run locally (even on a laptop) or deployed on a virtual machine with a single
 `docker compose` command. Checkout our [docs](https://docs.onyx.app/quickstart) to learn more.

-We also have built-in support for deployment on Kubernetes. Files for that can be found [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment/kubernetes).
+We also have built-in support for high-availability/scalable deployment on Kubernetes.
+References [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment).

-## 💃 Main Features

- Chat UI with the ability to select documents to chat with.
- Create custom AI Assistants with different prompts and backing knowledge sets.
- Connect Onyx with LLM of your choice (self-host for a fully airgapped solution).
- Document Search + AI Answers for natural language queries.
- Connectors to all common workplace tools like Google Drive, Confluence, Slack, etc.
- Slack integration to get answers and search results directly in Slack.
+## 🔍 Other Notable Benefits of Onyx
+- Custom deep learning models for indexing and inference time, only through Onyx + learning from user feedback.
+- Flexible security features like SSO (OIDC/SAML/OAuth2), RBAC, encryption of credentials, etc.
+- Knowledge curation features like document-sets, query history, usage analytics, etc.
+- Scalable deployment options tested up to many tens of thousands users and hundreds of millions of documents.
+

 ## 🚧 Roadmap
-
- Chat/Prompt sharing with specific teammates and user groups.
- Multimodal model support, chat with images, video etc.
- Choosing between LLMs and parameters during chat session.
- Tool calling and agent configurations options.
+- New methods in information retrieval (StructRAG, LightGraphRAG, etc.)
+- Personalized Search
 - Organizational understanding and ability to locate and suggest experts from your team.
+- Code Search
+- SQL and Structured Query Language

-## Other Notable Benefits of Onyx
-
- User Authentication with document level access management.
- Best in class Hybrid Search across all sources (BM-25 + prefix aware embedding models).
- Admin Dashboard to configure connectors, document-sets, access, etc.
- Custom deep learning models + learn from user feedback.
- Easy deployment and ability to host Onyx anywhere of your choosing.

 ## 🔌 Connectors
+Keep knowledge and access up to sync across 40+ connectors:

-Efficiently pulls the latest changes from:
-
- Slack
- GitHub
 - Google Drive
 - Confluence
+- Slack
+- Gmail
+- Salesforce
+- Microsoft Sharepoint
+- Github
 - Jira
 - Zendesk
- Gmail
- Notion
 - Gong
- Slab
- Linear
- Productboard
- Guru
- Bookstack
- Document360
- Sharepoint
- Hubspot
+- Microsoft Teams
+- Dropbox
 - Local Files
 - Websites
 - And more ...

-## 📚 Editions
+See the full list [here](https://docs.onyx.app/connectors).

+
+## 📚 Licensing
 There are two editions of Onyx:

- Onyx Community Edition (CE) is available freely under the MIT Expat license. This version has ALL the core features discussed above. This is the version of Onyx you will get if you follow the Deployment guide above.
- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations. Specifically, this includes:
-  - Single Sign-On (SSO), with support for both SAML and OIDC
-  - Role-based access control
-  - Document permission inheritance from connected sources
-  - Usage analytics and query history accessible to admins
-  - Whitelabeling
-  - API key authentication
-  - Encryption of secrets
-  - And many more! Checkout [our website](https://www.onyx.app/) for the latest.
+- Onyx Community Edition (CE) is available freely under the MIT Expat license. Simply follow the Deployment guide above.
+- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
+For feature details, check out [our website](https://www.onyx.app/pricing).

 To try the Onyx Enterprise Edition:
+1. Checkout [Onyx Cloud](https://cloud.onyx.app/signup).
+2. For self-hosting the Enterprise Edition, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).

-1. Checkout our [Cloud product](https://cloud.onyx.app/signup).
-2. For self-hosting, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).

 ## 💡 Contributing
-
 Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
-
-## ⭐Star History
-
-[![Star History Chart](https://api.star-history.com/svg?repos=onyx-dot-app/onyx&type=Date)](https://star-history.com/#onyx-dot-app/onyx&Date)
-
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -28,11 +28,11 @@ RUN apt-get update && \
        curl \
        zip \
        ca-certificates \
-        libgnutls30=3.7.9-2+deb12u3 \
-        libblkid1=2.38.1-5+deb12u1 \
-        libmount1=2.38.1-5+deb12u1 \
-        libsmartcols1=2.38.1-5+deb12u1 \
-        libuuid1=2.38.1-5+deb12u1 \
+        libgnutls30 \
+        libblkid1 \
+        libmount1 \
+        libsmartcols1 \
+        libuuid1 \
        libxmlsec1-dev \
        pkg-config \
        gcc \
--- a/backend/alembic/env.py
+++ b/backend/alembic/env.py
@@ -1,6 +1,6 @@
 from typing import Any, Literal
-from onyx.db.engine import SYNC_DB_API, get_iam_auth_token
-from onyx.configs.app_configs import POSTGRES_DB, USE_IAM_AUTH
+from onyx.db.engine import get_iam_auth_token
+from onyx.configs.app_configs import USE_IAM_AUTH
 from onyx.configs.app_configs import POSTGRES_HOST
 from onyx.configs.app_configs import POSTGRES_PORT
 from onyx.configs.app_configs import POSTGRES_USER
@@ -13,11 +13,12 @@ from sqlalchemy import text
 from sqlalchemy.engine.base import Connection
 import os
 import ssl
+import asyncio
 import logging
 from logging.config import fileConfig

 from alembic import context
-from sqlalchemy import create_engine
+from sqlalchemy.ext.asyncio import create_async_engine
 from sqlalchemy.sql.schema import SchemaItem
 from onyx.configs.constants import SSL_CERT_FILE
 from shared_configs.configs import MULTI_TENANT, POSTGRES_DEFAULT_SCHEMA
@@ -132,32 +133,17 @@ def provide_iam_token_for_alembic(
        cparams["ssl"] = ssl_context


-def run_migrations() -> None:
+async def run_async_migrations() -> None:
    schema_name, create_schema, upgrade_all_tenants = get_schema_options()

-    # Get any environment variables passed through alembic config
-    env_vars = context.config.attributes.get("env_vars", {})
-
-    # Use env vars if provided, otherwise fall back to defaults
-    postgres_host = env_vars.get("POSTGRES_HOST", POSTGRES_HOST)
-    postgres_port = env_vars.get("POSTGRES_PORT", POSTGRES_PORT)
-    postgres_user = env_vars.get("POSTGRES_USER", POSTGRES_USER)
-    postgres_db = env_vars.get("POSTGRES_DB", POSTGRES_DB)
-
-    engine = create_engine(
-        build_connection_string(
-            db=postgres_db,
-            user=postgres_user,
-            host=postgres_host,
-            port=postgres_port,
-            db_api=SYNC_DB_API,
-        ),
+    engine = create_async_engine(
+        build_connection_string(),
        poolclass=pool.NullPool,
    )

    if USE_IAM_AUTH:

-        @event.listens_for(engine, "do_connect")
+        @event.listens_for(engine.sync_engine, "do_connect")
        def event_provide_iam_token_for_alembic(
            dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
        ) -> None:
@@ -166,26 +152,31 @@ def run_migrations() -> None:
    if upgrade_all_tenants:
        tenant_schemas = get_all_tenant_ids()
        for schema in tenant_schemas:
-            if schema is None:
-                continue
-
            try:
                logger.info(f"Migrating schema: {schema}")
-                with engine.connect() as connection:
-                    do_run_migrations(connection, schema, create_schema)
+                async with engine.connect() as connection:
+                    await connection.run_sync(
+                        do_run_migrations,
+                        schema_name=schema,
+                        create_schema=create_schema,
+                    )
            except Exception as e:
                logger.error(f"Error migrating schema {schema}: {e}")
                raise
    else:
        try:
            logger.info(f"Migrating schema: {schema_name}")
-            with engine.connect() as connection:
-                do_run_migrations(connection, schema_name, create_schema)
+            async with engine.connect() as connection:
+                await connection.run_sync(
+                    do_run_migrations,
+                    schema_name=schema_name,
+                    create_schema=create_schema,
+                )
        except Exception as e:
            logger.error(f"Error migrating schema {schema_name}: {e}")
            raise

-    engine.dispose()
+    await engine.dispose()


 def run_migrations_offline() -> None:
@@ -193,18 +184,18 @@ def run_migrations_offline() -> None:
    url = build_connection_string()

    if upgrade_all_tenants:
-        engine = create_engine(url)
+        engine = create_async_engine(url)

        if USE_IAM_AUTH:

-            @event.listens_for(engine, "do_connect")
+            @event.listens_for(engine.sync_engine, "do_connect")
            def event_provide_iam_token_for_alembic_offline(
                dialect: Any, conn_rec: Any, cargs: Any, cparams: Any
            ) -> None:
                provide_iam_token_for_alembic(dialect, conn_rec, cargs, cparams)

        tenant_schemas = get_all_tenant_ids()
-        engine.dispose()
+        engine.sync_engine.dispose()

        for schema in tenant_schemas:
            logger.info(f"Migrating schema: {schema}")
@@ -239,7 +230,7 @@ def run_migrations_offline() -> None:


 def run_migrations_online() -> None:
-    run_migrations()
+    asyncio.run(run_async_migrations())


 if context.is_offline_mode():
--- a/backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
+++ b/backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
@@ -0,0 +1,27 @@
+"""Add indexes to document__tag
+
+Revision ID: 1a03d2c2856b
+Revises: 9c00a2bccb83
+Create Date: 2025-02-18 10:45:13.957807
+
+"""
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "1a03d2c2856b"
+down_revision = "9c00a2bccb83"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_index(
+        op.f("ix_document__tag_tag_id"),
+        "document__tag",
+        ["tag_id"],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(op.f("ix_document__tag_tag_id"), table_name="document__tag")
--- a/backend/alembic/versions/9c00a2bccb83_chat_message_agentic.py
+++ b/backend/alembic/versions/9c00a2bccb83_chat_message_agentic.py
@@ -0,0 +1,43 @@
+"""chat_message_agentic
+
+Revision ID: 9c00a2bccb83
+Revises: b7a7eee5aa15
+Create Date: 2025-02-17 11:15:43.081150
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "9c00a2bccb83"
+down_revision = "b7a7eee5aa15"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # First add the column as nullable
+    op.add_column("chat_message", sa.Column("is_agentic", sa.Boolean(), nullable=True))
+
+    # Update existing rows based on presence of SubQuestions
+    op.execute(
+        """
+        UPDATE chat_message
+        SET is_agentic = EXISTS (
+            SELECT 1
+            FROM agent__sub_question
+            WHERE agent__sub_question.primary_question_id = chat_message.id
+        )
+        WHERE is_agentic IS NULL
+    """
+    )
+
+    # Make the column non-nullable with a default value of False
+    op.alter_column(
+        "chat_message", "is_agentic", nullable=False, server_default=sa.text("false")
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_message", "is_agentic")
--- a/backend/alembic/versions/acaab4ef4507_remove_inactive_ccpair_status_on_.py
+++ b/backend/alembic/versions/acaab4ef4507_remove_inactive_ccpair_status_on_.py
@@ -0,0 +1,29 @@
+"""remove inactive ccpair status on downgrade
+
+Revision ID: acaab4ef4507
+Revises: b388730a2899
+Create Date: 2025-02-16 18:21:41.330212
+
+"""
+from alembic import op
+from onyx.db.models import ConnectorCredentialPair
+from onyx.db.enums import ConnectorCredentialPairStatus
+from sqlalchemy import update
+
+# revision identifiers, used by Alembic.
+revision = "acaab4ef4507"
+down_revision = "b388730a2899"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    pass
+
+
+def downgrade() -> None:
+    op.execute(
+        update(ConnectorCredentialPair)
+        .where(ConnectorCredentialPair.status == ConnectorCredentialPairStatus.INVALID)
+        .values(status=ConnectorCredentialPairStatus.ACTIVE)
+    )
--- a/backend/alembic/versions/b388730a2899_nullable_preferences.py
+++ b/backend/alembic/versions/b388730a2899_nullable_preferences.py
@@ -0,0 +1,31 @@
+"""nullable preferences
+
+Revision ID: b388730a2899
+Revises: 1a03d2c2856b
+Create Date: 2025-02-17 18:49:22.643902
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "b388730a2899"
+down_revision = "1a03d2c2856b"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.alter_column("user", "temperature_override_enabled", nullable=True)
+    op.alter_column("user", "auto_scroll", nullable=True)
+
+
+def downgrade() -> None:
+    # Ensure no null values before making columns non-nullable
+    op.execute(
+        'UPDATE "user" SET temperature_override_enabled = false WHERE temperature_override_enabled IS NULL'
+    )
+    op.execute('UPDATE "user" SET auto_scroll = false WHERE auto_scroll IS NULL')
+
+    op.alter_column("user", "temperature_override_enabled", nullable=False)
+    op.alter_column("user", "auto_scroll", nullable=False)
--- a/backend/alembic/versions/f13db29f3101_add_composite_index_for_last_modified_.py
+++ b/backend/alembic/versions/f13db29f3101_add_composite_index_for_last_modified_.py
@@ -0,0 +1,27 @@
+"""Add composite index for last_modified and last_synced to document
+
+Revision ID: f13db29f3101
+Revises: b388730a2899
+Create Date: 2025-02-18 22:48:11.511389
+
+"""
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "f13db29f3101"
+down_revision = "acaab4ef4507"
+branch_labels: str | None = None
+depends_on: str | None = None
+
+
+def upgrade() -> None:
+    op.create_index(
+        "ix_document_sync_status",
+        "document",
+        ["last_modified", "last_synced"],
+        unique=False,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_document_sync_status", table_name="document")
--- a/backend/ee/onyx/background/celery/apps/primary.py
+++ b/backend/ee/onyx/background/celery/apps/primary.py
@@ -21,7 +21,7 @@ logger = setup_logger()
 def perform_ttl_management_task(
    retention_limit_days: int, *, tenant_id: str | None
 ) -> None:
-    with get_session_with_tenant(tenant_id) as db_session:
+    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
        delete_chat_sessions_older_than(retention_limit_days, db_session)


@@ -44,7 +44,7 @@ def check_ttl_management_task(*, tenant_id: str | None) -> None:

    settings = load_settings()
    retention_limit_days = settings.maximum_chat_retention_days
-    with get_session_with_tenant(tenant_id) as db_session:
+    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
        if should_perform_chat_ttl_check(retention_limit_days, db_session):
            perform_ttl_management_task.apply_async(
                kwargs=dict(
@@ -62,7 +62,7 @@ def check_ttl_management_task(*, tenant_id: str | None) -> None:
 )
 def autogenerate_usage_report_task(*, tenant_id: str | None) -> None:
    """This generates usage report under the /admin/generate-usage/report endpoint"""
-    with get_session_with_tenant(tenant_id) as db_session:
+    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
        create_new_usage_report(
            db_session=db_session,
            user_id=None,
--- a/backend/ee/onyx/external_permissions/confluence/group_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/group_sync.py
@@ -14,30 +14,24 @@ def _build_group_member_email_map(
    confluence_client: OnyxConfluence, cc_pair_id: int
 ) -> dict[str, set[str]]:
    group_member_emails: dict[str, set[str]] = {}
-    for user_result in confluence_client.paginated_cql_user_retrieval():
-        logger.debug(f"Processing groups for user: {user_result}")
+    for user in confluence_client.paginated_cql_user_retrieval():
+        logger.debug(f"Processing groups for user: {user}")

-        user = user_result.get("user", {})
-        if not user:
-            msg = f"user result missing user field: {user_result}"
-            emit_background_error(msg, cc_pair_id=cc_pair_id)
-            logger.error(msg)
-            continue
-
-        email = user.get("email")
+        email = user.email
        if not email:
            # This field is only present in Confluence Server
-            user_name = user.get("username")
+            user_name = user.username
            # If it is present, try to get the email using a Server-specific method
            if user_name:
                email = get_user_email_from_username__server(
                    confluence_client=confluence_client,
                    user_name=user_name,
                )
+
        if not email:
            # If we still don't have an email, skip this user
-            msg = f"user result missing email field: {user_result}"
-            if user.get("type") == "app":
+            msg = f"user result missing email field: {user}"
+            if user.type == "app":
                logger.warning(msg)
            else:
                emit_background_error(msg, cc_pair_id=cc_pair_id)
@@ -45,7 +39,7 @@ def _build_group_member_email_map(
            continue

        all_users_groups: set[str] = set()
-        for group in confluence_client.paginated_groups_by_user_retrieval(user):
+        for group in confluence_client.paginated_groups_by_user_retrieval(user.user_id):
            # group name uniqueness is enforced by Confluence, so we can use it as a group ID
            group_id = group["name"]
            group_member_emails.setdefault(group_id, set()).add(email)
--- a/backend/ee/onyx/server/middleware/tenant_tracking.py
+++ b/backend/ee/onyx/server/middleware/tenant_tracking.py
@@ -33,7 +33,7 @@ def add_tenant_id_middleware(app: FastAPI, logger: logging.LoggerAdapter) -> Non
            return await call_next(request)

        except Exception as e:
-            logger.error(f"Error in tenant ID middleware: {str(e)}")
+            logger.exception(f"Error in tenant ID middleware: {str(e)}")
            raise


@@ -49,7 +49,7 @@ async def _get_tenant_id_from_request(
    """
    # Check for API key
    tenant_id = extract_tenant_from_api_key_header(request)
-    if tenant_id:
+    if tenant_id is not None:
        return tenant_id

    # Check for anonymous user cookie
--- a/backend/ee/onyx/server/oauth.py
+++ b/backend/ee/onyx/server/oauth.py
@@ -36,12 +36,12 @@ from onyx.connectors.google_utils.shared_constants import (
    GoogleOAuthAuthenticationMethod,
 )
 from onyx.db.credentials import create_credential
-from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session
 from onyx.db.models import User
 from onyx.redis.redis_pool import get_redis_client
 from onyx.server.documents.models import CredentialBase
 from onyx.utils.logger import setup_logger
+from shared_configs.contextvars import get_current_tenant_id


 logger = setup_logger()
@@ -271,12 +271,12 @@ def prepare_authorization_request(
    connector: DocumentSource,
    redirect_on_success: str | None,
    user: User = Depends(current_user),
-    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> JSONResponse:
    """Used by the frontend to generate the url for the user's browser during auth request.

    Example: https://www.oauth.com/oauth2-servers/authorization/the-authorization-request/
    """
+    tenant_id = get_current_tenant_id()

    # create random oauth state param for security and to retrieve user data later
    oauth_uuid = uuid.uuid4()
@@ -329,7 +329,6 @@ def handle_slack_oauth_callback(
    state: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
-    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> JSONResponse:
    if not SlackOAuth.CLIENT_ID or not SlackOAuth.CLIENT_SECRET:
        raise HTTPException(
@@ -337,7 +336,7 @@ def handle_slack_oauth_callback(
            detail="Slack client ID or client secret is not configured.",
        )

-    r = get_redis_client(tenant_id=tenant_id)
+    r = get_redis_client()

    # recover the state
    padded_state = state + "=" * (
@@ -523,7 +522,6 @@ def handle_google_drive_oauth_callback(
    state: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
-    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> JSONResponse:
    if not GoogleDriveOAuth.CLIENT_ID or not GoogleDriveOAuth.CLIENT_SECRET:
        raise HTTPException(
@@ -531,7 +529,7 @@ def handle_google_drive_oauth_callback(
            detail="Google Drive client ID or client secret is not configured.",
        )

-    r = get_redis_client(tenant_id=tenant_id)
+    r = get_redis_client()

    # recover the state
    padded_state = state + "=" * (
--- a/backend/ee/onyx/server/query_and_chat/token_limit.py
+++ b/backend/ee/onyx/server/query_and_chat/token_limit.py
@@ -28,7 +28,7 @@ from onyx.server.query_and_chat.token_limit import _user_is_rate_limited_by_glob
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel


-def _check_token_rate_limits(user: User | None, tenant_id: str | None) -> None:
+def _check_token_rate_limits(user: User | None, tenant_id: str) -> None:
    if user is None:
        # Unauthenticated users are only rate limited by global settings
        _user_is_rate_limited_by_global(tenant_id)
@@ -52,8 +52,8 @@ User rate limits
 """


-def _user_is_rate_limited(user_id: UUID, tenant_id: str | None) -> None:
-    with get_session_with_tenant(tenant_id) as db_session:
+def _user_is_rate_limited(user_id: UUID, tenant_id: str) -> None:
+    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
        user_rate_limits = fetch_all_user_token_rate_limits(
            db_session=db_session, enabled_only=True, ordered=False
        )
@@ -94,7 +94,7 @@ User Group rate limits


 def _user_is_rate_limited_by_group(user_id: UUID, tenant_id: str | None) -> None:
-    with get_session_with_tenant(tenant_id) as db_session:
+    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
        group_rate_limits = _fetch_all_user_group_rate_limits(user_id, db_session)

        if group_rate_limits:
--- a/backend/ee/onyx/server/tenants/api.py
+++ b/backend/ee/onyx/server/tenants/api.py
@@ -41,14 +41,15 @@ from onyx.auth.users import User
 from onyx.configs.app_configs import WEB_DOMAIN
 from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
 from onyx.db.auth import get_user_count
-from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session
+from onyx.db.engine import get_session_with_shared_schema
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.users import delete_user_from_db
 from onyx.db.users import get_user_by_email
 from onyx.server.manage.models import UserByEmail
 from onyx.utils.logger import setup_logger
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
+from shared_configs.contextvars import get_current_tenant_id

 stripe.api_key = STRIPE_SECRET_KEY
 logger = setup_logger()
@@ -57,13 +58,14 @@ router = APIRouter(prefix="/tenants")

@router.get("/anonymous-user-path")
 async def get_anonymous_user_path_api(
-    tenant_id: str | None = Depends(get_current_tenant_id),
    _: User | None = Depends(current_admin_user),
 ) -> AnonymousUserPath:
+    tenant_id = get_current_tenant_id()
+
    if tenant_id is None:
        raise HTTPException(status_code=404, detail="Tenant not found")

-    with get_session_with_tenant(tenant_id=None) as db_session:
+    with get_session_with_shared_schema() as db_session:
        current_path = get_anonymous_user_path(tenant_id, db_session)

    return AnonymousUserPath(anonymous_user_path=current_path)
@@ -72,15 +74,15 @@ async def get_anonymous_user_path_api(
@router.post("/anonymous-user-path")
 async def set_anonymous_user_path_api(
    anonymous_user_path: str,
-    tenant_id: str = Depends(get_current_tenant_id),
    _: User | None = Depends(current_admin_user),
 ) -> None:
+    tenant_id = get_current_tenant_id()
    try:
        validate_anonymous_user_path(anonymous_user_path)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

-    with get_session_with_tenant(tenant_id=None) as db_session:
+    with get_session_with_shared_schema() as db_session:
        try:
            modify_anonymous_user_path(tenant_id, anonymous_user_path, db_session)
        except IntegrityError:
@@ -101,7 +103,7 @@ async def login_as_anonymous_user(
    anonymous_user_path: str,
    _: User | None = Depends(optional_user),
 ) -> Response:
-    with get_session_with_tenant(tenant_id=None) as db_session:
+    with get_session_with_shared_schema() as db_session:
        tenant_id = get_tenant_id_for_anonymous_user_path(
            anonymous_user_path, db_session
        )
@@ -150,14 +152,17 @@ async def billing_information(
    _: User = Depends(current_admin_user),
 ) -> BillingInformation | SubscriptionStatusResponse:
    logger.info("Fetching billing information")
-    return fetch_billing_information(CURRENT_TENANT_ID_CONTEXTVAR.get())
+    tenant_id = get_current_tenant_id()
+    return fetch_billing_information(tenant_id)


@router.post("/create-customer-portal-session")
-async def create_customer_portal_session(_: User = Depends(current_admin_user)) -> dict:
+async def create_customer_portal_session(
+    _: User = Depends(current_admin_user),
+) -> dict:
+    tenant_id = get_current_tenant_id()
+
    try:
-        # Fetch tenant_id and current tenant's information
-        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
        stripe_info = fetch_tenant_stripe_information(tenant_id)
        stripe_customer_id = stripe_info.get("stripe_customer_id")
        if not stripe_customer_id:
@@ -181,6 +186,8 @@ async def create_subscription_session(
 ) -> SubscriptionSessionResponse:
    try:
        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
+        if not tenant_id:
+            raise HTTPException(status_code=400, detail="Tenant ID not found")
        session_id = fetch_stripe_checkout_session(tenant_id)
        return SubscriptionSessionResponse(sessionId=session_id)

@@ -197,7 +204,7 @@ async def impersonate_user(
    """Allows a cloud superuser to impersonate another user by generating an impersonation JWT token"""
    tenant_id = get_tenant_id_for_email(impersonate_request.email)

-    with get_session_with_tenant(tenant_id) as tenant_session:
+    with get_session_with_tenant(tenant_id=tenant_id) as tenant_session:
        user_to_impersonate = get_user_by_email(
            impersonate_request.email, tenant_session
        )
@@ -221,8 +228,9 @@ async def leave_organization(
    user_email: UserByEmail,
    current_user: User | None = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
-    tenant_id: str = Depends(get_current_tenant_id),
 ) -> None:
+    tenant_id = get_current_tenant_id()
+
    if current_user is None or current_user.email != user_email.user_email:
        raise HTTPException(
            status_code=403, detail="You can only leave the organization as yourself"
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -118,7 +118,7 @@ async def provision_tenant(tenant_id: str, email: str) -> None:
        # Await the Alembic migrations
        await asyncio.to_thread(run_alembic_migrations, tenant_id)

-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            configure_default_api_keys(db_session)

            current_search_settings = (
@@ -134,7 +134,7 @@ async def provision_tenant(tenant_id: str, email: str) -> None:

        add_users_to_tenant([email], tenant_id)

-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            create_milestone_and_report(
                user=None,
                distinct_id=tenant_id,
--- a/backend/ee/onyx/server/tenants/user_mapping.py
+++ b/backend/ee/onyx/server/tenants/user_mapping.py
@@ -28,7 +28,7 @@ def get_tenant_id_for_email(email: str) -> str:


 def user_owns_a_tenant(email: str) -> bool:
-    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
+    with get_session_with_tenant(tenant_id=None) as db_session:
        result = (
            db_session.query(UserTenantMapping)
            .filter(UserTenantMapping.email == email)
@@ -38,7 +38,7 @@ def user_owns_a_tenant(email: str) -> bool:


 def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
-    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
+    with get_session_with_tenant(tenant_id=None) as db_session:
        try:
            for email in emails:
                db_session.add(UserTenantMapping(email=email, tenant_id=tenant_id))
@@ -48,7 +48,7 @@ def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:


 def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:
-    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
+    with get_session_with_tenant(tenant_id=None) as db_session:
        try:
            mappings_to_delete = (
                db_session.query(UserTenantMapping)
@@ -71,7 +71,7 @@ def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:


 def remove_all_users_from_tenant(tenant_id: str) -> None:
-    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
+    with get_session_with_tenant(tenant_id=None) as db_session:
        db_session.query(UserTenantMapping).filter(
            UserTenantMapping.tenant_id == tenant_id
        ).delete()
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -98,12 +98,17 @@ class CloudEmbedding:
            return final_embeddings
        except Exception as e:
            error_string = (
-                f"Error embedding text with OpenAI: {str(e)} \n"
-                f"Model: {model} \n"
-                f"Provider: {self.provider} \n"
-                f"Texts: {texts}"
+                f"Exception embedding text with OpenAI - {type(e)}: "
+                f"Model: {model} "
+                f"Provider: {self.provider} "
+                f"Exception: {e}"
            )
            logger.error(error_string)
+
+            # only log text when it's not an authentication error.
+            if not isinstance(e, openai.AuthenticationError):
+                logger.debug(f"Exception texts: {texts}")
+
            raise RuntimeError(error_string)

    async def _embed_cohere(
--- a/backend/onyx/agents/agent_search/basic/graph_builder.py
+++ b/backend/onyx/agents/agent_search/basic/graph_builder.py
@@ -5,14 +5,14 @@ from langgraph.graph import StateGraph
 from onyx.agents.agent_search.basic.states import BasicInput
 from onyx.agents.agent_search.basic.states import BasicOutput
 from onyx.agents.agent_search.basic.states import BasicState
-from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
-    basic_use_tool_response,
-)
-from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
+from onyx.agents.agent_search.orchestration.nodes.call_tool import call_tool
+from onyx.agents.agent_search.orchestration.nodes.choose_tool import choose_tool
 from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
    prepare_tool_input,
 )
-from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
+from onyx.agents.agent_search.orchestration.nodes.use_tool_response import (
+    basic_use_tool_response,
+)
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -33,13 +33,13 @@ def basic_graph_builder() -> StateGraph:
    )

    graph.add_node(
-        node="llm_tool_choice",
-        action=llm_tool_choice,
+        node="choose_tool",
+        action=choose_tool,
    )

    graph.add_node(
-        node="tool_call",
-        action=tool_call,
+        node="call_tool",
+        action=call_tool,
    )

    graph.add_node(
@@ -51,12 +51,12 @@ def basic_graph_builder() -> StateGraph:

    graph.add_edge(start_key=START, end_key="prepare_tool_input")

-    graph.add_edge(start_key="prepare_tool_input", end_key="llm_tool_choice")
+    graph.add_edge(start_key="prepare_tool_input", end_key="choose_tool")

-    graph.add_conditional_edges("llm_tool_choice", should_continue, ["tool_call", END])
+    graph.add_conditional_edges("choose_tool", should_continue, ["call_tool", END])

    graph.add_edge(
-        start_key="tool_call",
+        start_key="call_tool",
        end_key="basic_use_tool_response",
    )

@@ -73,7 +73,7 @@ def should_continue(state: BasicState) -> str:
        # If there are no tool calls, basic graph already streamed the answer
        END
        if state.tool_choice is None
-        else "tool_call"
+        else "call_tool"
    )


--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
@@ -31,12 +31,14 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
-from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
+from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK
+from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_CHECK
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -85,9 +87,11 @@ def check_sub_answer(
    agent_error: AgentErrorLog | None = None
    response: BaseMessage | None = None
    try:
-        response = fast_llm.invoke(
+        response = run_with_timeout(
+            AGENT_TIMEOUT_LLM_SUBANSWER_CHECK,
+            fast_llm.invoke,
            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK,
+            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK,
        )

        quality_str: str = cast(str, response.content)
@@ -96,7 +100,7 @@ def check_sub_answer(
        )
        log_result = f"Answer quality: {quality_str}"

-    except LLMTimeoutError:
+    except (LLMTimeoutError, TimeoutError):
        agent_error = AgentErrorLog(
            error_type=AgentLLMErrorType.TIMEOUT,
            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
@@ -1,5 +1,4 @@
 from datetime import datetime
-from typing import Any
 from typing import cast

 from langchain_core.messages import merge_message_runs
@@ -47,11 +46,13 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
-from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
+from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
+from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import NO_RECOVERED_DOCS
 from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -110,15 +111,14 @@ def generate_sub_answer(
            config=fast_llm.config,
        )

-        response: list[str | list[str | dict[str, Any]]] = []
        dispatch_timings: list[float] = []
-
        agent_error: AgentErrorLog | None = None
+        response: list[str] = []

-        try:
+        def stream_sub_answer() -> list[str]:
            for message in fast_llm.stream(
                prompt=msg,
-                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION,
+                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION,
            ):
                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
                content = message.content
@@ -142,8 +142,15 @@ def generate_sub_answer(
                    (end_stream_token - start_stream_token).microseconds
                )
                response.append(content)
+            return response

-        except LLMTimeoutError:
+        try:
+            response = run_with_timeout(
+                AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION,
+                stream_sub_answer,
+            )
+
+        except (LLMTimeoutError, TimeoutError):
            agent_error = AgentErrorLog(
                error_type=AgentLLMErrorType.TIMEOUT,
                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
@@ -1,5 +1,4 @@
 from datetime import datetime
-from typing import Any
 from typing import cast

 from langchain_core.messages import HumanMessage
@@ -60,11 +59,15 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
 from onyx.chat.models import StreamingError
+from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
 from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
+    AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
 )
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
@@ -77,6 +80,7 @@ from onyx.prompts.agent_search import (
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
+from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 _llm_node_error_strings = LLMNodeErrorStrings(
@@ -230,7 +234,11 @@ def generate_initial_answer(

        sub_questions = all_sub_questions  # Replace the original assignment

-        model = graph_config.tooling.fast_llm
+        model = (
+            graph_config.tooling.fast_llm
+            if AGENT_ANSWER_GENERATION_BY_FAST_LLM
+            else graph_config.tooling.primary_llm
+        )

        doc_context = format_docs(answer_generation_documents.context_documents)
        doc_context = trim_prompt_piece(
@@ -260,15 +268,16 @@ def generate_initial_answer(
            )
        ]

-        streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
+        streamed_tokens: list[str] = [""]
        dispatch_timings: list[float] = []

        agent_error: AgentErrorLog | None = None

-        try:
+        def stream_initial_answer() -> list[str]:
+            response: list[str] = []
            for message in model.stream(
                msg,
-                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
+                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
            ):
                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
                content = message.content
@@ -292,9 +301,16 @@ def generate_initial_answer(
                dispatch_timings.append(
                    (end_stream_token - start_stream_token).microseconds
                )
-                streamed_tokens.append(content)
+                response.append(content)
+            return response

-        except LLMTimeoutError:
+        try:
+            streamed_tokens = run_with_timeout(
+                AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
+                stream_initial_answer,
+            )
+
+        except (LLMTimeoutError, TimeoutError):
            agent_error = AgentErrorLog(
                error_type=AgentLLMErrorType.TIMEOUT,
                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
@@ -36,7 +36,10 @@ from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
 from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
+    AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION,
 )
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
@@ -47,6 +50,7 @@ from onyx.prompts.agent_search import (
    INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT,
 )
 from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -131,10 +135,12 @@ def decompose_orig_question(
    streamed_tokens: list[BaseMessage_Content] = []

    try:
-        streamed_tokens = dispatch_separated(
+        streamed_tokens = run_with_timeout(
+            AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION,
+            dispatch_separated,
            model.stream(
                msg,
-                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
+                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
            ),
            dispatch_subquestion(0, writer),
            sep_callback=dispatch_subquestion_sep(0, writer),
@@ -154,7 +160,7 @@ def decompose_orig_question(
        )
        write_custom_event("stream_finished", stop_event, writer)

-    except LLMTimeoutError as e:
+    except (LLMTimeoutError, TimeoutError) as e:
        logger.error("LLM Timeout Error - decompose orig question")
        raise e  # fail loudly on this critical step
    except LLMRateLimitError as e:
--- a/backend/onyx/agents/agent_search/deep_search/main/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/edges.py
@@ -25,7 +25,7 @@ logger = setup_logger()

 def route_initial_tool_choice(
    state: MainState, config: RunnableConfig
-) -> Literal["tool_call", "start_agent_search", "logging_node"]:
+) -> Literal["call_tool", "start_agent_search", "logging_node"]:
    """
    LangGraph edge to route to agent search.
    """
@@ -38,7 +38,7 @@ def route_initial_tool_choice(
        ):
            return "start_agent_search"
        else:
-            return "tool_call"
+            return "call_tool"
    else:
        return "logging_node"

--- a/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
@@ -43,14 +43,14 @@ from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.deep_search.refinement.consolidate_sub_answers.graph_builder import (
    answer_refined_query_graph_builder,
 )
-from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
-    basic_use_tool_response,
-)
-from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
+from onyx.agents.agent_search.orchestration.nodes.call_tool import call_tool
+from onyx.agents.agent_search.orchestration.nodes.choose_tool import choose_tool
 from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
    prepare_tool_input,
 )
-from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
+from onyx.agents.agent_search.orchestration.nodes.use_tool_response import (
+    basic_use_tool_response,
+)
 from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
 from onyx.utils.logger import setup_logger

@@ -77,13 +77,13 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
    # Choose the initial tool
    graph.add_node(
        node="initial_tool_choice",
-        action=llm_tool_choice,
+        action=choose_tool,
    )

    # Call the tool, if required
    graph.add_node(
-        node="tool_call",
-        action=tool_call,
+        node="call_tool",
+        action=call_tool,
    )

    # Use the tool response
@@ -168,11 +168,11 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
    graph.add_conditional_edges(
        "initial_tool_choice",
        route_initial_tool_choice,
-        ["tool_call", "start_agent_search", "logging_node"],
+        ["call_tool", "start_agent_search", "logging_node"],
    )

    graph.add_edge(
-        start_key="tool_call",
+        start_key="call_tool",
        end_key="basic_use_tool_response",
    )
    graph.add_edge(
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
@@ -33,13 +33,15 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import RefinedAnswerImprovement
-from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
+from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS
+from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_COMPARE_ANSWERS
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
 )
 from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -105,11 +107,14 @@ def compare_answers(
    refined_answer_improvement: bool | None = None
    # no need to stream this
    try:
-        resp = model.invoke(
-            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
+        resp = run_with_timeout(
+            AGENT_TIMEOUT_LLM_COMPARE_ANSWERS,
+            model.invoke,
+            prompt=msg,
+            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS,
        )

-    except LLMTimeoutError:
+    except (LLMTimeoutError, TimeoutError):
        agent_error = AgentErrorLog(
            error_type=AgentLLMErrorType.TIMEOUT,
            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
@@ -44,7 +44,10 @@ from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import StreamingError
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
+    AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION,
 )
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
@@ -53,6 +56,7 @@ from onyx.prompts.agent_search import (
 )
 from onyx.tools.models import ToolCallKickoff
 from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -134,15 +138,17 @@ def create_refined_sub_questions(
    agent_error: AgentErrorLog | None = None
    streamed_tokens: list[BaseMessage_Content] = []
    try:
-        streamed_tokens = dispatch_separated(
+        streamed_tokens = run_with_timeout(
+            AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION,
+            dispatch_separated,
            model.stream(
                msg,
-                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
+                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
            ),
            dispatch_subquestion(1, writer),
            sep_callback=dispatch_subquestion_sep(1, writer),
        )
-    except LLMTimeoutError:
+    except (LLMTimeoutError, TimeoutError):
        agent_error = AgentErrorLog(
            error_type=AgentLLMErrorType.TIMEOUT,
            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
@@ -22,11 +22,17 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
+    AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION,
 )
 from onyx.configs.constants import NUM_EXPLORATORY_DOCS
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE
+from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time


@@ -84,30 +90,42 @@ def extract_entities_terms(
    ]
    fast_llm = graph_config.tooling.fast_llm
    # Grader
-    llm_response = fast_llm.invoke(
-        prompt=msg,
-        timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
-    )
-
-    cleaned_response = (
-        str(llm_response.content).replace("```json\n", "").replace("\n```", "")
-    )
-    first_bracket = cleaned_response.find("{")
-    last_bracket = cleaned_response.rfind("}")
-    cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
-
    try:
-        entity_extraction_result = EntityExtractionResult.model_validate_json(
-            cleaned_response
+        llm_response = run_with_timeout(
+            AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION,
+            fast_llm.invoke,
+            prompt=msg,
+            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
        )
-    except ValueError:
-        logger.error("Failed to parse LLM response as JSON in Entity-Term Extraction")
+
+        cleaned_response = (
+            str(llm_response.content).replace("```json\n", "").replace("\n```", "")
+        )
+        first_bracket = cleaned_response.find("{")
+        last_bracket = cleaned_response.rfind("}")
+        cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
+
+        try:
+            entity_extraction_result = EntityExtractionResult.model_validate_json(
+                cleaned_response
+            )
+        except ValueError:
+            logger.error(
+                "Failed to parse LLM response as JSON in Entity-Term Extraction"
+            )
+            entity_extraction_result = EntityExtractionResult(
+                retrieved_entities_relationships=EntityRelationshipTermExtraction(),
+            )
+    except (LLMTimeoutError, TimeoutError):
+        logger.error("LLM Timeout Error - extract entities terms")
        entity_extraction_result = EntityExtractionResult(
-            retrieved_entities_relationships=EntityRelationshipTermExtraction(
-                entities=[],
-                relationships=[],
-                terms=[],
-            ),
+            retrieved_entities_relationships=EntityRelationshipTermExtraction(),
+        )
+
+    except LLMRateLimitError:
+        logger.error("LLM Rate Limit Error - extract entities terms")
+        entity_extraction_result = EntityExtractionResult(
+            retrieved_entities_relationships=EntityRelationshipTermExtraction(),
        )

    return EntityTermExtractionUpdate(
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
@@ -1,5 +1,4 @@
 from datetime import datetime
-from typing import Any
 from typing import cast

 from langchain_core.messages import HumanMessage
@@ -66,14 +65,21 @@ from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
 from onyx.chat.models import StreamingError
+from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
 from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION,
+    AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION,
 )
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION,
+    AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION,
 )
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
@@ -92,6 +98,7 @@ from onyx.prompts.agent_search import (
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
 from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -253,7 +260,12 @@ def generate_validate_refined_answer(
        else REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS
    )

-    model = graph_config.tooling.fast_llm
+    model = (
+        graph_config.tooling.fast_llm
+        if AGENT_ANSWER_GENERATION_BY_FAST_LLM
+        else graph_config.tooling.primary_llm
+    )
+
    relevant_docs_str = format_docs(answer_generation_documents.context_documents)
    relevant_docs_str = trim_prompt_piece(
        model.config,
@@ -284,13 +296,13 @@ def generate_validate_refined_answer(
        )
    ]

-    streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
+    streamed_tokens: list[str] = [""]
    dispatch_timings: list[float] = []
    agent_error: AgentErrorLog | None = None

-    try:
+    def stream_refined_answer() -> list[str]:
        for message in model.stream(
-            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
+            msg, timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
        ):
            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
            content = message.content
@@ -315,8 +327,15 @@ def generate_validate_refined_answer(
                (end_stream_token - start_stream_token).microseconds
            )
            streamed_tokens.append(content)
+        return streamed_tokens

-    except LLMTimeoutError:
+    try:
+        streamed_tokens = run_with_timeout(
+            AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION,
+            stream_refined_answer,
+        )
+
+    except (LLMTimeoutError, TimeoutError):
        agent_error = AgentErrorLog(
            error_type=AgentLLMErrorType.TIMEOUT,
            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
@@ -383,16 +402,20 @@ def generate_validate_refined_answer(
        )
    ]

+    validation_model = graph_config.tooling.fast_llm
    try:
-        validation_response = model.invoke(
-            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
+        validation_response = run_with_timeout(
+            AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION,
+            validation_model.invoke,
+            prompt=msg,
+            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
        )
        refined_answer_quality = binary_string_test_after_answer_separator(
            text=cast(str, validation_response.content),
            positive_value=AGENT_POSITIVE_VALUE_STR,
            separator=AGENT_ANSWER_SEPARATOR,
        )
-    except LLMTimeoutError:
+    except (LLMTimeoutError, TimeoutError):
        refined_answer_quality = True
        logger.error("LLM Timeout Error - validate refined answer")

--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
@@ -34,14 +34,16 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
+    AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
 )
+from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    QUERY_REWRITING_PROMPT,
 )
 from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -69,7 +71,7 @@ def expand_queries(
    node_start_time = datetime.now()
    question = state.question

-    llm = graph_config.tooling.fast_llm
+    model = graph_config.tooling.fast_llm
    sub_question_id = state.sub_question_id
    if sub_question_id is None:
        level, question_num = 0, 0
@@ -88,10 +90,12 @@ def expand_queries(
    rewritten_queries = []

    try:
-        llm_response_list = dispatch_separated(
-            llm.stream(
+        llm_response_list = run_with_timeout(
+            AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION,
+            dispatch_separated,
+            model.stream(
                prompt=msg,
-                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
+                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
            ),
            dispatch_subquery(level, question_num, writer),
        )
@@ -101,7 +105,7 @@ def expand_queries(
        rewritten_queries = llm_response.split("\n")
        log_result = f"Number of expanded queries: {len(rewritten_queries)}"

-    except LLMTimeoutError:
+    except (LLMTimeoutError, TimeoutError):
        agent_error = AgentErrorLog(
            error_type=AgentLLMErrorType.TIMEOUT,
            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
@@ -55,6 +55,7 @@ def rerank_documents(

    # Note that these are passed in values from the API and are overrides which are typically None
    rerank_settings = graph_config.inputs.search_request.rerank_settings
+    allow_agent_reranking = graph_config.behavior.allow_agent_reranking

    if rerank_settings is None:
        with get_session_context_manager() as db_session:
@@ -62,23 +63,31 @@ def rerank_documents(
            if not search_settings.disable_rerank_for_streaming:
                rerank_settings = RerankingDetails.from_db_model(search_settings)

+    # Initial default: no reranking. Will be overwritten below if reranking is warranted
+    reranked_documents = verified_documents
+
    if should_rerank(rerank_settings) and len(verified_documents) > 0:
        if len(verified_documents) > 1:
-            reranked_documents = rerank_sections(
-                query_str=question,
-                # if runnable, then rerank_settings is not None
-                rerank_settings=cast(RerankingDetails, rerank_settings),
-                sections_to_rerank=verified_documents,
-            )
+            if not allow_agent_reranking:
+                logger.info("Use of local rerank model without GPU, skipping reranking")
+            # No reranking, stay with verified_documents as default
+
+            else:
+                # Reranking is warranted, use the rerank_sections functon
+                reranked_documents = rerank_sections(
+                    query_str=question,
+                    # if runnable, then rerank_settings is not None
+                    rerank_settings=cast(RerankingDetails, rerank_settings),
+                    sections_to_rerank=verified_documents,
+                )
        else:
            logger.warning(
                f"{len(verified_documents)} verified document(s) found, skipping reranking"
            )
-            reranked_documents = verified_documents
+            # No reranking, stay with verified_documents as default
    else:
        logger.warning("No reranking settings found, using unranked documents")
-        reranked_documents = verified_documents
-
+        # No reranking, stay with verified_documents as default
    if AGENT_RERANKING_STATS:
        fit_scores = get_fit_scores(verified_documents, reranked_documents)
    else:
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
@@ -25,13 +25,15 @@ from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrin
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
-from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
+from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION
+from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION
 from onyx.llm.chat_llm import LLMRateLimitError
 from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    DOCUMENT_VERIFICATION_PROMPT,
 )
 from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout
 from onyx.utils.timing import log_function_time

 logger = setup_logger()
@@ -86,8 +88,11 @@ def verify_documents(
    ]  # default is to treat document as relevant

    try:
-        response = fast_llm.invoke(
-            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
+        response = run_with_timeout(
+            AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION,
+            fast_llm.invoke,
+            prompt=msg,
+            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION,
        )

        assert isinstance(response.content, str)
@@ -96,7 +101,7 @@ def verify_documents(
        ):
            verified_documents = []

-    except LLMTimeoutError:
+    except (LLMTimeoutError, TimeoutError):
        # In this case, we decide to continue and don't raise an error, as
        # little harm in letting some docs through that are less relevant.
        logger.error("LLM Timeout Error - verify documents")
--- a/backend/onyx/agents/agent_search/models.py
+++ b/backend/onyx/agents/agent_search/models.py
@@ -67,6 +67,7 @@ class GraphSearchConfig(BaseModel):
    # Whether to allow creation of refinement questions (and entity extraction, etc.)
    allow_refinement: bool = True
    skip_gen_ai_answer_generation: bool = False
+    allow_agent_reranking: bool = False


 class GraphConfig(BaseModel):
--- a/backend/onyx/agents/agent_search/orchestration/nodes/call_tool.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/call_tool.py
@@ -28,7 +28,7 @@ def emit_packet(packet: AnswerPacket, writer: StreamWriter) -> None:
    write_custom_event("basic_response", packet, writer)


-def tool_call(
+def call_tool(
    state: ToolChoiceUpdate,
    config: RunnableConfig,
    writer: StreamWriter = lambda _: None,
--- a/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
@@ -25,7 +25,7 @@ logger = setup_logger()
 # and a function that handles extracting the necessary fields
 # from the state and config
 # TODO: fan-out to multiple tool call nodes? Make this configurable?
-def llm_tool_choice(
+def choose_tool(
    state: ToolChoiceState,
    config: RunnableConfig,
    writer: StreamWriter = lambda _: None,
--- a/backend/onyx/agents/agent_search/orchestration/nodes/basic_use_tool_response.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/basic_use_tool_response.py
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -43,8 +43,9 @@ from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
 from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
+    AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
 )
+from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION
 from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from onyx.configs.constants import DEFAULT_PERSONA_ID
@@ -80,6 +81,7 @@ from onyx.tools.tool_implementations.search.search_tool import SearchResponseSum
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.utils import explicit_tool_calling_supported
 from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_with_timeout

 logger = setup_logger()

@@ -395,11 +397,13 @@ def summarize_history(
    )

    try:
-        history_response = llm.invoke(
+        history_response = run_with_timeout(
+            AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION,
+            llm.invoke,
            history_context_prompt,
-            timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
+            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
        )
-    except LLMTimeoutError:
+    except (LLMTimeoutError, TimeoutError):
        logger.error("LLM Timeout Error - summarize history")
        return (
            history  # this is what is done at this point anyway, so we default to this
--- a/backend/onyx/auth/email_utils.py
+++ b/backend/onyx/auth/email_utils.py
@@ -10,6 +10,7 @@ from onyx.configs.app_configs import SMTP_PORT
 from onyx.configs.app_configs import SMTP_SERVER
 from onyx.configs.app_configs import SMTP_USER
 from onyx.configs.app_configs import WEB_DOMAIN
+from onyx.configs.constants import AuthType
 from onyx.configs.constants import TENANT_ID_COOKIE_NAME
 from onyx.db.models import User

@@ -187,23 +188,51 @@ def send_subscription_cancellation_email(user_email: str) -> None:
    send_email(user_email, subject, html_content, text_content)


-def send_user_email_invite(user_email: str, current_user: User) -> None:
+def send_user_email_invite(
+    user_email: str, current_user: User, auth_type: AuthType
+) -> None:
    subject = "Invitation to Join Onyx Organization"
    heading = "You've Been Invited!"
-    message = (
-        f"<p>You have been invited by {current_user.email} to join an organization on Onyx.</p>"
-        "<p>To join the organization, please click the button below to set a password "
-        "or login with Google and complete your registration.</p>"
-    )
+
+    # the exact action taken by the user, and thus the message, depends on the auth type
+    message = f"<p>You have been invited by {current_user.email} to join an organization on Onyx.</p>"
+    if auth_type == AuthType.CLOUD:
+        message += (
+            "<p>To join the organization, please click the button below to set a password "
+            "or login with Google and complete your registration.</p>"
+        )
+    elif auth_type == AuthType.BASIC:
+        message += (
+            "<p>To join the organization, please click the button below to set a password "
+            "and complete your registration.</p>"
+        )
+    elif auth_type == AuthType.GOOGLE_OAUTH:
+        message += (
+            "<p>To join the organization, please click the button below to login with Google "
+            "and complete your registration.</p>"
+        )
+    elif auth_type == AuthType.OIDC or auth_type == AuthType.SAML:
+        message += (
+            "<p>To join the organization, please click the button below to"
+            " complete your registration.</p>"
+        )
+    else:
+        raise ValueError(f"Invalid auth type: {auth_type}")
+
    cta_text = "Join Organization"
    cta_link = f"{WEB_DOMAIN}/auth/signup?email={user_email}"
    html_content = build_html_email(heading, message, cta_text, cta_link)
+
+    # text content is the fallback for clients that don't support HTML
+    # not as critical, so not having special cases for each auth type
    text_content = (
        f"You have been invited by {current_user.email} to join an organization on Onyx.\n"
        "To join the organization, please visit the following link:\n"
        f"{WEB_DOMAIN}/auth/signup?email={user_email}\n"
-        "You'll be asked to set a password or login with Google to complete your registration."
    )
+    if auth_type == AuthType.CLOUD:
+        text_content += "You'll be asked to set a password or login with Google to complete your registration."
+
    send_email(user_email, subject, html_content, text_content)


--- a/backend/onyx/auth/noauth_user.py
+++ b/backend/onyx/auth/noauth_user.py
@@ -42,4 +42,5 @@ def fetch_no_auth_user(
        role=UserRole.BASIC if anonymous_user_enabled else UserRole.ADMIN,
        preferences=load_no_auth_user_preferences(store),
        is_anonymous_user=anonymous_user_enabled,
+        password_configured=False,
    )
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -1,5 +1,7 @@
 import json
+import random
 import secrets
+import string
 import uuid
 from collections.abc import AsyncGenerator
 from datetime import datetime
@@ -86,7 +88,6 @@ from onyx.db.auth import get_user_db
 from onyx.db.auth import SQLAlchemyUserAdminDB
 from onyx.db.engine import get_async_session
 from onyx.db.engine import get_async_session_with_tenant
-from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.models import AccessToken
 from onyx.db.models import OAuthAccount
@@ -94,6 +95,7 @@ from onyx.db.models import User
 from onyx.db.users import get_user_by_email
 from onyx.redis.redis_pool import get_async_redis_connection
 from onyx.redis.redis_pool import get_redis_client
+from onyx.server.utils import BasicAuthenticationError
 from onyx.utils.logger import setup_logger
 from onyx.utils.telemetry import create_milestone_and_report
 from onyx.utils.telemetry import optional_telemetry
@@ -103,15 +105,11 @@ from onyx.utils.variable_functionality import fetch_versioned_implementation
 from shared_configs.configs import async_return_default_schema
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
+from shared_configs.contextvars import get_current_tenant_id

 logger = setup_logger()


-class BasicAuthenticationError(HTTPException):
-    def __init__(self, detail: str):
-        super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)
-
-
 def is_user_admin(user: User | None) -> bool:
    if AUTH_TYPE == AuthType.DISABLED:
        return True
@@ -143,6 +141,30 @@ def get_display_email(email: str | None, space_less: bool = False) -> str:
    return email or ""


+def generate_password() -> str:
+    lowercase_letters = string.ascii_lowercase
+    uppercase_letters = string.ascii_uppercase
+    digits = string.digits
+    special_characters = string.punctuation
+
+    # Ensure at least one of each required character type
+    password = [
+        secrets.choice(uppercase_letters),
+        secrets.choice(digits),
+        secrets.choice(special_characters),
+    ]
+
+    # Fill the rest with a mix of characters
+    remaining_length = 12 - len(password)
+    all_characters = lowercase_letters + uppercase_letters + digits + special_characters
+    password.extend(secrets.choice(all_characters) for _ in range(remaining_length))
+
+    # Shuffle the password to randomize the position of the required characters
+    random.shuffle(password)
+
+    return "".join(password)
+
+
 def user_needs_to_be_verified() -> bool:
    if AUTH_TYPE == AuthType.BASIC or AUTH_TYPE == AuthType.CLOUD:
        return REQUIRE_EMAIL_VERIFICATION
@@ -193,7 +215,7 @@ def verify_email_is_invited(email: str) -> None:


 def verify_email_in_whitelist(email: str, tenant_id: str | None = None) -> None:
-    with get_session_with_tenant(tenant_id) as db_session:
+    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
        if not get_user_by_email(email, db_session):
            verify_email_is_invited(email)

@@ -595,6 +617,39 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):

            return user

+    async def reset_password_as_admin(self, user_id: uuid.UUID) -> str:
+        """Admin-only. Generate a random password for a user and return it."""
+        user = await self.get(user_id)
+        new_password = generate_password()
+        await self._update(user, {"password": new_password})
+        return new_password
+
+    async def change_password_if_old_matches(
+        self, user: User, old_password: str, new_password: str
+    ) -> None:
+        """
+        For normal users to change password if they know the old one.
+        Raises 400 if old password doesn't match.
+        """
+        verified, updated_password_hash = self.password_helper.verify_and_update(
+            old_password, user.hashed_password
+        )
+        if not verified:
+            # Raise some HTTPException (or your custom exception) if old password is invalid:
+            from fastapi import HTTPException, status
+
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Invalid current password",
+            )
+
+        # If the hash was upgraded behind the scenes, we can keep it before setting the new password:
+        if updated_password_hash:
+            user.hashed_password = updated_password_hash
+
+        # Now apply and validate the new password
+        await self._update(user, {"password": new_password})
+

 async def get_user_manager(
    user_db: SQLAlchemyUserDatabase = Depends(get_user_db),
@@ -819,8 +874,9 @@ async def current_limited_user(

 async def current_chat_accesssible_user(
    user: User | None = Depends(optional_user),
-    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> User | None:
+    tenant_id = get_current_tenant_id()
+
    return await double_check_user(
        user, allow_anonymous_access=anonymous_user_enabled(tenant_id=tenant_id)
    )
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -33,6 +33,7 @@ from onyx.redis.redis_connector_ext_group_sync import RedisConnectorExternalGrou
 from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_document_set import RedisDocumentSet
 from onyx.redis.redis_pool import get_redis_client
+from onyx.redis.redis_pool import get_shared_redis_client
 from onyx.redis.redis_usergroup import RedisUserGroup
 from onyx.utils.logger import ColoredFormatter
 from onyx.utils.logger import PlainFormatter
@@ -58,13 +59,35 @@ else:
    logger.debug("Sentry DSN not provided, skipping Sentry initialization")


+class TenantAwareTask(Task):
+    """A custom base Task that sets tenant_id in a contextvar before running."""
+
+    abstract = True  # So Celery knows not to register this as a real task.
+
+    def __call__(self, *args: Any, **kwargs: Any) -> Any:
+        # Grab tenant_id from the kwargs, or fallback to default if missing.
+        tenant_id = kwargs.get("tenant_id", None) or POSTGRES_DEFAULT_SCHEMA
+
+        # Set the context var
+        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+
+        # Actually run the task now
+        try:
+            return super().__call__(*args, **kwargs)
+        finally:
+            # Clear or reset after the task runs
+            # so it does not leak into any subsequent tasks on the same worker process
+            CURRENT_TENANT_ID_CONTEXTVAR.set(None)
+
+
+@task_prerun.connect
 def on_task_prerun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple[Any, ...] | None = None,
    kwargs: dict[str, Any] | None = None,
-    **kwds: Any,
+    **other_kwargs: Any,
 ) -> None:
    pass

@@ -201,7 +224,7 @@ def wait_for_redis(sender: Any, **kwargs: Any) -> None:
    Will raise WorkerShutdown to kill the celery worker if the timeout
    is reached."""

-    r = get_redis_client(tenant_id=None)
+    r = get_shared_redis_client()

    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60
@@ -287,7 +310,7 @@ def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:
    # Set up variables for waiting on primary worker
    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60
-    r = get_redis_client(tenant_id=None)
+    r = get_shared_redis_client()
    time_start = time.monotonic()

    logger.info("Waiting for primary worker to be ready...")
@@ -439,24 +462,6 @@ class TenantContextFilter(logging.Filter):
        return True


-@task_prerun.connect
-def set_tenant_id(
-    sender: Any | None = None,
-    task_id: str | None = None,
-    task: Task | None = None,
-    args: tuple[Any, ...] | None = None,
-    kwargs: dict[str, Any] | None = None,
-    **other_kwargs: Any,
-) -> None:
-    """Signal handler to set tenant ID in context var before task starts."""
-    tenant_id = (
-        kwargs.get("tenant_id", POSTGRES_DEFAULT_SCHEMA)
-        if kwargs
-        else POSTGRES_DEFAULT_SCHEMA
-    )
-    CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
-
-
@task_postrun.connect
 def reset_tenant_id(
    sender: Any | None = None,
--- a/backend/onyx/background/celery/apps/beat.py
+++ b/backend/onyx/background/celery/apps/beat.py
@@ -132,6 +132,7 @@ class DynamicTenantScheduler(PersistentScheduler):
                        f"Adding options to task {tenant_task_name}: {options}"
                    )
                    tenant_task["options"] = options
+
                new_schedule[tenant_task_name] = tenant_task

        return new_schedule
@@ -256,3 +257,4 @@ def on_setup_logging(


 celery_app.conf.beat_scheduler = DynamicTenantScheduler
+celery_app.conf.task_default_base = app_base.TenantAwareTask
--- a/backend/onyx/background/celery/apps/heavy.py
+++ b/backend/onyx/background/celery/apps/heavy.py
@@ -20,6 +20,7 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.heavy")
+celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
--- a/backend/onyx/background/celery/apps/indexing.py
+++ b/backend/onyx/background/celery/apps/indexing.py
@@ -21,6 +21,7 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.indexing")
+celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
--- a/backend/onyx/background/celery/apps/light.py
+++ b/backend/onyx/background/celery/apps/light.py
@@ -23,6 +23,7 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.light")
+celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
--- a/backend/onyx/background/celery/apps/monitoring.py
+++ b/backend/onyx/background/celery/apps/monitoring.py
@@ -20,6 +20,7 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.monitoring")
+celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -24,7 +24,7 @@ from onyx.configs.constants import CELERY_PRIMARY_WORKER_LOCK_TIMEOUT
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.configs.constants import POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME
-from onyx.db.engine import get_session_with_default_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.engine import SqlEngine
 from onyx.db.index_attempt import get_index_attempt
 from onyx.db.index_attempt import mark_attempt_canceled
@@ -38,7 +38,7 @@ from onyx.redis.redis_connector_index import RedisConnectorIndex
 from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_connector_stop import RedisConnectorStop
 from onyx.redis.redis_document_set import RedisDocumentSet
-from onyx.redis.redis_pool import get_redis_client
+from onyx.redis.redis_pool import get_shared_redis_client
 from onyx.redis.redis_usergroup import RedisUserGroup
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT
@@ -47,6 +47,7 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.primary")
+celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
@@ -101,7 +102,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:

    # This is singleton work that should be done on startup exactly once
    # by the primary worker. This is unnecessary in the multi tenant scenario
-    r = get_redis_client(tenant_id=None)
+    r = get_shared_redis_client()

    # Log the role and slave count - being connected to a slave or slave count > 0 could be problematic
    info: dict[str, Any] = cast(dict, r.info("replication"))
@@ -158,7 +159,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
    RedisConnectorExternalGroupSync.reset_all(r)

    # mark orphaned index attempts as failed
-    with get_session_with_default_tenant() as db_session:
+    with get_session_with_current_tenant() as db_session:
        unfenced_attempt_ids = get_unfenced_index_attempt_ids(db_session, r)
        for attempt_id in unfenced_attempt_ids:
            attempt = get_index_attempt(db_session, attempt_id)
@@ -234,7 +235,7 @@ class HubPeriodicTask(bootsteps.StartStopStep):

            lock: RedisLock = worker.primary_worker_lock

-            r = get_redis_client(tenant_id=None)
+            r = get_shared_redis_client()

            if lock.owned():
                task_logger.debug("Reacquiring primary worker lock.")
--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -27,7 +27,7 @@ from onyx.db.connector_credential_pair import get_connector_credential_pair_from
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
 from onyx.db.document import get_document_ids_for_connector_credential_pair
 from onyx.db.document_set import delete_document_set_cc_pair_relationship__no_commit
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
@@ -62,8 +62,8 @@ class TaskDependencyError(RuntimeError):
 def check_for_connector_deletion_task(
    self: Task, *, tenant_id: str | None
 ) -> bool | None:
-    r = get_redis_client(tenant_id=tenant_id)
-    r_replica = get_redis_replica_client(tenant_id=tenant_id)
+    r = get_redis_client()
+    r_replica = get_redis_replica_client()

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK,
@@ -77,14 +77,14 @@ def check_for_connector_deletion_task(
    try:
        # collect cc_pair_ids
        cc_pair_ids: list[int] = []
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            cc_pairs = get_connector_credential_pairs(db_session)
            for cc_pair in cc_pairs:
                cc_pair_ids.append(cc_pair.id)

        # try running cleanup on the cc_pair_ids
        for cc_pair_id in cc_pair_ids:
-            with get_session_with_tenant(tenant_id) as db_session:
+            with get_session_with_current_tenant() as db_session:
                redis_connector = RedisConnector(tenant_id, cc_pair_id)
                try:
                    try_generate_document_cc_pair_cleanup_tasks(
@@ -277,7 +277,7 @@ def monitor_connector_deletion_taskset(
        f"Connector deletion progress: cc_pair={cc_pair_id} remaining={remaining} initial={fence_data.num_tasks}"
    )
    if remaining > 0:
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            update_sync_record_status(
                db_session=db_session,
                entity_id=cc_pair_id,
@@ -287,7 +287,7 @@ def monitor_connector_deletion_taskset(
            )
        return

-    with get_session_with_tenant(tenant_id) as db_session:
+    with get_session_with_current_tenant() as db_session:
        cc_pair = get_connector_credential_pair_from_id(
            db_session=db_session,
            cc_pair_id=cc_pair_id,
--- a/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
+++ b/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
@@ -45,7 +45,7 @@ from onyx.configs.constants import OnyxRedisSignals
 from onyx.db.connector import mark_cc_pair_as_permissions_synced
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.document import upsert_document_by_connector_credential_pair
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.enums import AccessType
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import SyncStatus
@@ -119,13 +119,13 @@ def _is_external_doc_permissions_sync_due(cc_pair: ConnectorCredentialPair) -> b
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
 )
-def check_for_doc_permissions_sync(self: Task, *, tenant_id: str | None) -> bool | None:
+def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None:
    # TODO(rkuo): merge into check function after lookup table for fences is added

    # we need to use celery's redis client to access its redis data
    # (which lives on a different db number)
-    r = get_redis_client(tenant_id=tenant_id)
-    r_replica = get_redis_replica_client(tenant_id=tenant_id)
+    r = get_redis_client()
+    r_replica = get_redis_replica_client()
    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
@@ -140,7 +140,7 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str | None) -> bool
    try:
        # get all cc pairs that need to be synced
        cc_pair_ids_to_sync: list[int] = []
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            cc_pairs = get_all_auto_sync_cc_pairs(db_session)

            for cc_pair in cc_pairs:
@@ -189,7 +189,7 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str | None) -> bool

            key_str = key_bytes.decode("utf-8")
            if key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):
-                with get_session_with_tenant(tenant_id) as db_session:
+                with get_session_with_current_tenant() as db_session:
                    monitor_ccpair_permissions_taskset(
                        tenant_id, key_bytes, r, db_session
                    )
@@ -247,7 +247,7 @@ def try_creating_permissions_sync_task(
        # create before setting fence to avoid race condition where the monitoring
        # task updates the sync record before it is created
        try:
-            with get_session_with_tenant(tenant_id) as db_session:
+            with get_session_with_current_tenant() as db_session:
                insert_sync_record(
                    db_session=db_session,
                    entity_id=cc_pair_id,
@@ -321,7 +321,7 @@ def connector_permission_sync_generator_task(

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

-    r = get_redis_client(tenant_id=tenant_id)
+    r = get_redis_client()

    # this wait is needed to avoid a race condition where
    # the primary worker sends the task and it is immediately executed
@@ -378,7 +378,7 @@ def connector_permission_sync_generator_task(
        return None

    try:
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
@@ -480,7 +480,8 @@ def update_external_document_permissions_task(
    external_access = document_external_access.external_access

    try:
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
+            # Add the users to the DB if they don't exist
            batch_add_ext_perm_user_if_not_exists(
                db_session=db_session,
                emails=list(external_access.external_user_emails),
--- a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py
+++ b/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py
@@ -39,7 +39,7 @@ from onyx.configs.constants import OnyxRedisLocks
 from onyx.configs.constants import OnyxRedisSignals
 from onyx.db.connector import mark_cc_pair_as_external_group_synced
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.enums import AccessType
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import SyncStatus
@@ -122,8 +122,8 @@ def _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
 def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> bool | None:
    # we need to use celery's redis client to access its redis data
    # (which lives on a different db number)
-    r = get_redis_client(tenant_id=tenant_id)
-    r_replica = get_redis_replica_client(tenant_id=tenant_id)
+    r = get_redis_client()
+    r_replica = get_redis_replica_client()
    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
@@ -140,7 +140,7 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> bool

    try:
        cc_pair_ids_to_sync: list[int] = []
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            cc_pairs = get_all_auto_sync_cc_pairs(db_session)

            # We only want to sync one cc_pair per source type in
@@ -230,7 +230,7 @@ def try_creating_external_group_sync_task(
        # create before setting fence to avoid race condition where the monitoring
        # task updates the sync record before it is created
        try:
-            with get_session_with_tenant(tenant_id) as db_session:
+            with get_session_with_current_tenant() as db_session:
                insert_sync_record(
                    db_session=db_session,
                    entity_id=cc_pair_id,
@@ -296,7 +296,7 @@ def connector_external_group_sync_generator_task(

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

-    r = get_redis_client(tenant_id=tenant_id)
+    r = get_redis_client()

    # this wait is needed to avoid a race condition where
    # the primary worker sends the task and it is immediately executed
@@ -357,10 +357,11 @@ def connector_external_group_sync_generator_task(
        payload.started = datetime.now(timezone.utc)
        redis_connector.external_group_sync.set_fence(payload)

-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
+                eager_load_credential=True,
            )
            if cc_pair is None:
                raise ValueError(
@@ -384,6 +385,7 @@ def connector_external_group_sync_generator_task(
            logger.info(
                f"Syncing {len(external_user_groups)} external user groups for {source_type}"
            )
+            logger.debug(f"New external user groups: {external_user_groups}")

            replace_user__ext_group_for_cc_pair(
                db_session=db_session,
@@ -408,7 +410,7 @@ def connector_external_group_sync_generator_task(
        task_logger.exception(msg)
        emit_background_error(msg + f"\n\n{e}", cc_pair_id=cc_pair_id)

-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            update_sync_record_status(
                db_session=db_session,
                entity_id=cc_pair_id,
@@ -459,7 +461,6 @@ def validate_external_group_sync_fences(
        )

        lock_beat.reacquire()
-
    return


--- a/backend/onyx/background/celery/tasks/indexing/tasks.py
+++ b/backend/onyx/background/celery/tasks/indexing/tasks.py
@@ -41,16 +41,18 @@ from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
 from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_INDEXING_LOCK_TIMEOUT
+from onyx.configs.constants import CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT
 from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.configs.constants import OnyxRedisSignals
+from onyx.connectors.interfaces import ConnectorValidationError
 from onyx.db.connector import mark_ccpair_with_indexing_trigger
 from onyx.db.connector_credential_pair import fetch_connector_credential_pairs
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.enums import IndexingMode
 from onyx.db.enums import IndexingStatus
 from onyx.db.index_attempt import get_index_attempt
@@ -90,6 +92,9 @@ class IndexingWatchdogTerminalStatus(str, Enum):
    SUCCEEDED = "succeeded"

    SPAWN_FAILED = "spawn_failed"  # connector spawn failed
+    SPAWN_NOT_ALIVE = (
+        "spawn_not_alive"  # spawn succeeded but process did not come alive
+    )

    BLOCKED_BY_DELETION = "blocked_by_deletion"
    BLOCKED_BY_STOP_SIGNAL = "blocked_by_stop_signal"
@@ -103,6 +108,9 @@ class IndexingWatchdogTerminalStatus(str, Enum):
        "index_attempt_mismatch"  # expected index attempt metadata not found in db
    )

+    CONNECTOR_VALIDATION_ERROR = (
+        "connector_validation_error"  # the connector validation failed
+    )
    CONNECTOR_EXCEPTIONED = "connector_exceptioned"  # the connector itself exceptioned
    WATCHDOG_EXCEPTIONED = "watchdog_exceptioned"  # the watchdog exceptioned

@@ -112,6 +120,8 @@ class IndexingWatchdogTerminalStatus(str, Enum):
    # the watchdog terminated the task due to no activity
    TERMINATED_BY_ACTIVITY_TIMEOUT = "terminated_by_activity_timeout"

+    # NOTE: this may actually be the same as SIGKILL, but parsed differently by python
+    # consolidate once we know more
    OUT_OF_MEMORY = "out_of_memory"

    PROCESS_SIGNAL_SIGKILL = "process_signal_sigkill"
@@ -121,6 +131,7 @@ class IndexingWatchdogTerminalStatus(str, Enum):
        _ENUM_TO_CODE: dict[IndexingWatchdogTerminalStatus, int] = {
            IndexingWatchdogTerminalStatus.PROCESS_SIGNAL_SIGKILL: -9,
            IndexingWatchdogTerminalStatus.OUT_OF_MEMORY: 137,
+            IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR: 247,
            IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION: 248,
            IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL: 249,
            IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND: 250,
@@ -137,6 +148,8 @@ class IndexingWatchdogTerminalStatus(str, Enum):
    def from_code(cls, code: int) -> "IndexingWatchdogTerminalStatus":
        _CODE_TO_ENUM: dict[int, IndexingWatchdogTerminalStatus] = {
            -9: IndexingWatchdogTerminalStatus.PROCESS_SIGNAL_SIGKILL,
+            137: IndexingWatchdogTerminalStatus.OUT_OF_MEMORY,
+            247: IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR,
            248: IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION,
            249: IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL,
            250: IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND,
@@ -348,12 +361,13 @@ def monitor_ccpair_indexing_taskset(
 def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
    """a lightweight task used to kick off indexing tasks.
    Occcasionally does some validation of existing state to clear up error conditions"""
+
    time_start = time.monotonic()

    tasks_created = 0
    locked = False
-    redis_client = get_redis_client(tenant_id=tenant_id)
-    redis_client_replica = get_redis_replica_client(tenant_id=tenant_id)
+    redis_client = get_redis_client()
+    redis_client_replica = get_redis_replica_client()

    # we need to use celery's redis client to access its redis data
    # (which lives on a different db number)
@@ -391,7 +405,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
        # 1/3: KICKOFF

        # check for search settings swap
-        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            old_search_settings = check_index_swap(db_session=db_session)
            current_search_settings = get_current_search_settings(db_session)
            # So that the first time users aren't surprised by really slow speed of first
@@ -412,7 +426,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
        # gather cc_pair_ids
        lock_beat.reacquire()
        cc_pair_ids: list[int] = []
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            cc_pairs = fetch_connector_credential_pairs(db_session)
            for cc_pair_entry in cc_pairs:
                cc_pair_ids.append(cc_pair_entry.id)
@@ -422,7 +436,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
            lock_beat.reacquire()

            redis_connector = RedisConnector(tenant_id, cc_pair_id)
-            with get_session_with_tenant(tenant_id) as db_session:
+            with get_session_with_current_tenant() as db_session:
                search_settings_list = get_active_search_settings_list(db_session)
                for search_settings_instance in search_settings_list:
                    redis_connector_index = redis_connector.new_index(
@@ -500,7 +514,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:

        # Fail any index attempts in the DB that don't have fences
        # This shouldn't ever happen!
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            unfenced_attempt_ids = get_unfenced_index_attempt_ids(
                db_session, redis_client
            )
@@ -552,7 +566,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:

            key_str = key_bytes.decode("utf-8")
            if key_str.startswith(RedisConnectorIndex.FENCE_PREFIX):
-                with get_session_with_tenant(tenant_id) as db_session:
+                with get_session_with_current_tenant() as db_session:
                    monitor_ccpair_indexing_taskset(
                        tenant_id, key_bytes, redis_client_replica, db_session
                    )
@@ -583,8 +597,8 @@ def connector_indexing_task(
    index_attempt_id: int,
    cc_pair_id: int,
    search_settings_id: int,
-    tenant_id: str | None,
    is_ee: bool,
+    tenant_id: str | None,
 ) -> int | None:
    """Indexing task. For a cc pair, this task pulls all document IDs from the source
    and compares those IDs to locally stored documents and deletes all locally stored IDs missing
@@ -635,7 +649,7 @@ def connector_indexing_task(
    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    redis_connector_index = redis_connector.new_index(search_settings_id)

-    r = get_redis_client(tenant_id=tenant_id)
+    r = get_redis_client()

    if redis_connector.delete.fenced:
        raise SimpleJobException(
@@ -729,7 +743,7 @@ def connector_indexing_task(
    redis_connector_index.set_fence(payload)

    try:
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            attempt = get_index_attempt(db_session, index_attempt_id)
            if not attempt:
                raise SimpleJobException(
@@ -764,9 +778,9 @@ def connector_indexing_task(
        callback = IndexingCallback(
            os.getppid(),
            redis_connector,
-            redis_connector_index,
            lock,
            r,
+            redis_connector_index,
        )

        logger.info(
@@ -788,6 +802,15 @@ def connector_indexing_task(
        # get back the total number of indexed docs and return it
        n_final_progress = redis_connector_index.get_progress()
        redis_connector_index.set_generator_complete(HTTPStatus.OK.value)
+    except ConnectorValidationError:
+        raise SimpleJobException(
+            f"Indexing task failed: attempt={index_attempt_id} "
+            f"tenant={tenant_id} "
+            f"cc_pair={cc_pair_id} "
+            f"search_settings={search_settings_id}",
+            code=IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR.code,
+        )
+
    except Exception as e:
        logger.exception(
            f"Indexing spawned task failed: attempt={index_attempt_id} "
@@ -795,8 +818,8 @@ def connector_indexing_task(
            f"cc_pair={cc_pair_id} "
            f"search_settings={search_settings_id}"
        )
-
        raise e
+
    finally:
        if lock.owned():
            lock.release()
@@ -907,12 +930,11 @@ def connector_indexing_proxy_task(
        index_attempt_id,
        cc_pair_id,
        search_settings_id,
-        tenant_id,
        global_version.is_ee_version(),
-        pure=False,
+        tenant_id,
    )

-    if not job:
+    if not job or not job.process:
        result.status = IndexingWatchdogTerminalStatus.SPAWN_FAILED
        task_logger.info(
            log_builder.build(
@@ -923,13 +945,39 @@ def connector_indexing_proxy_task(
        )
        return

-    task_logger.info(log_builder.build("Indexing watchdog - spawn succeeded"))
+    # Ensure the process has moved out of the starting state
+    num_waits = 0
+    while True:
+        if num_waits > 15:
+            result.status = IndexingWatchdogTerminalStatus.SPAWN_NOT_ALIVE
+            task_logger.info(
+                log_builder.build(
+                    "Indexing watchdog - finished",
+                    status=str(result.status.value),
+                    exit_code=str(result.exit_code),
+                )
+            )
+            job.release()
+            return
+
+        if job.process.is_alive() or job.process.exitcode is not None:
+            break
+
+        sleep(1)
+        num_waits += 1
+
+    task_logger.info(
+        log_builder.build(
+            "Indexing watchdog - spawn succeeded",
+            pid=str(job.process.pid),
+        )
+    )

    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    redis_connector_index = redis_connector.new_index(search_settings_id)

    try:
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            index_attempt = get_index_attempt(
                db_session=db_session, index_attempt_id=index_attempt_id
            )
@@ -940,6 +988,9 @@ def connector_indexing_proxy_task(
                index_attempt.connector_credential_pair.connector.source.value
            )

+        redis_connector_index.set_active()  # renew active signal
+        redis_connector_index.set_connector_active()  # prime the connective active signal
+
        while True:
            sleep(5)

@@ -974,10 +1025,42 @@ def connector_indexing_proxy_task(
                result.status = IndexingWatchdogTerminalStatus.TERMINATED_BY_SIGNAL
                break

+            if not redis_connector_index.connector_active():
+                task_logger.warning(
+                    log_builder.build(
+                        "Indexing watchdog - activity timeout exceeded",
+                        timeout=f"{CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT}s",
+                    )
+                )
+
+                try:
+                    with get_session_with_current_tenant() as db_session:
+                        mark_attempt_failed(
+                            index_attempt_id,
+                            db_session,
+                            "Indexing watchdog - activity timeout exceeded: "
+                            f"attempt={index_attempt_id} "
+                            f"timeout={CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT}s",
+                        )
+                except Exception:
+                    # if the DB exceptions, we'll just get an unfriendly failure message
+                    # in the UI instead of the cancellation message
+                    logger.exception(
+                        log_builder.build(
+                            "Indexing watchdog - transient exception marking index attempt as failed"
+                        )
+                    )
+
+                job.cancel()
+                result.status = (
+                    IndexingWatchdogTerminalStatus.TERMINATED_BY_ACTIVITY_TIMEOUT
+                )
+                break
+
            # if the spawned task is still running, restart the check once again
            # if the index attempt is not in a finished status
            try:
-                with get_session_with_tenant(tenant_id) as db_session:
+                with get_session_with_current_tenant() as db_session:
                    index_attempt = get_index_attempt(
                        db_session=db_session, index_attempt_id=index_attempt_id
                    )
@@ -996,16 +1079,20 @@ def connector_indexing_proxy_task(
                    )
                )
                continue
-    except Exception:
+    except Exception as e:
        result.status = IndexingWatchdogTerminalStatus.WATCHDOG_EXCEPTIONED
-        result.exception_str = traceback.format_exc()
+        if isinstance(e, ConnectorValidationError):
+            # No need to expose full stack trace for validation errors
+            result.exception_str = str(e)
+        else:
+            result.exception_str = traceback.format_exc()

    # handle exit and reporting
    elapsed = time.monotonic() - start
    if result.exception_str is not None:
        # print with exception
        try:
-            with get_session_with_tenant(tenant_id) as db_session:
+            with get_session_with_current_tenant() as db_session:
                failure_reason = (
                    f"Spawned task exceptioned: exit_code={result.exit_code}"
                )
@@ -1045,7 +1132,7 @@ def connector_indexing_proxy_task(
    # print without exception
    if result.status == IndexingWatchdogTerminalStatus.TERMINATED_BY_SIGNAL:
        try:
-            with get_session_with_tenant(tenant_id) as db_session:
+            with get_session_with_current_tenant() as db_session:
                mark_attempt_canceled(
                    index_attempt_id,
                    db_session,
@@ -1095,7 +1182,7 @@ def check_for_checkpoint_cleanup(*, tenant_id: str | None) -> None:

    try:
        locked = True
-        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            old_attempts = get_index_attempts_with_old_checkpoints(db_session)
            for attempt in old_attempts:
                task_logger.info(
@@ -1131,5 +1218,5 @@ def cleanup_checkpoint_task(
    self: Task, *, index_attempt_id: int, tenant_id: str | None
 ) -> None:
    """Clean up a checkpoint for a given index attempt"""
-    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+    with get_session_with_current_tenant() as db_session:
        cleanup_checkpoint(db_session, index_attempt_id)
--- a/backend/onyx/background/celery/tasks/indexing/utils.py
+++ b/backend/onyx/background/celery/tasks/indexing/utils.py
@@ -23,7 +23,7 @@ from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.db.engine import get_db_current_time
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import IndexingStatus
 from onyx.db.enums import IndexModelStatus
@@ -93,27 +93,25 @@ def get_unfenced_index_attempt_ids(db_session: Session, r: redis.Redis) -> list[
    return unfenced_attempts


-class IndexingCallback(IndexingHeartbeatInterface):
+class IndexingCallbackBase(IndexingHeartbeatInterface):
    PARENT_CHECK_INTERVAL = 60

    def __init__(
        self,
        parent_pid: int,
        redis_connector: RedisConnector,
-        redis_connector_index: RedisConnectorIndex,
        redis_lock: RedisLock,
        redis_client: Redis,
    ):
        super().__init__()
        self.parent_pid = parent_pid
        self.redis_connector: RedisConnector = redis_connector
-        self.redis_connector_index: RedisConnectorIndex = redis_connector_index
        self.redis_lock: RedisLock = redis_lock
        self.redis_client = redis_client
        self.started: datetime = datetime.now(timezone.utc)
        self.redis_lock.reacquire()

-        self.last_tag: str = "IndexingCallback.__init__"
+        self.last_tag: str = f"{self.__class__.__name__}.__init__"
        self.last_lock_reacquire: datetime = datetime.now(timezone.utc)
        self.last_lock_monotonic = time.monotonic()

@@ -127,8 +125,8 @@ class IndexingCallback(IndexingHeartbeatInterface):

    def progress(self, tag: str, amount: int) -> None:
        # rkuo: this shouldn't be necessary yet because we spawn the process this runs inside
-        # with daemon = True. It seems likely some indexing tasks will need to spawn other processes eventually
-        # so leave this code in until we're ready to test it.
+        # with daemon=True. It seems likely some indexing tasks will need to spawn other processes
+        # eventually, which daemon=True prevents, so leave this code in until we're ready to test it.

        # if self.parent_pid:
        #     # check if the parent pid is alive so we aren't running as a zombie
@@ -143,8 +141,6 @@ class IndexingCallback(IndexingHeartbeatInterface):
        #         self.last_parent_check = now

        try:
-            self.redis_connector.prune.set_active()
-
            current_time = time.monotonic()
            if current_time - self.last_lock_monotonic >= (
                CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4
@@ -156,7 +152,7 @@ class IndexingCallback(IndexingHeartbeatInterface):
            self.last_tag = tag
        except LockError:
            logger.exception(
-                f"IndexingCallback - lock.reacquire exceptioned: "
+                f"{self.__class__.__name__} - lock.reacquire exceptioned: "
                f"lock_timeout={self.redis_lock.timeout} "
                f"start={self.started} "
                f"last_tag={self.last_tag} "
@@ -167,6 +163,24 @@ class IndexingCallback(IndexingHeartbeatInterface):
            redis_lock_dump(self.redis_lock, self.redis_client)
            raise

+
+class IndexingCallback(IndexingCallbackBase):
+    def __init__(
+        self,
+        parent_pid: int,
+        redis_connector: RedisConnector,
+        redis_lock: RedisLock,
+        redis_client: Redis,
+        redis_connector_index: RedisConnectorIndex,
+    ):
+        super().__init__(parent_pid, redis_connector, redis_lock, redis_client)
+
+        self.redis_connector_index: RedisConnectorIndex = redis_connector_index
+
+    def progress(self, tag: str, amount: int) -> None:
+        self.redis_connector_index.set_active()
+        self.redis_connector_index.set_connector_active()
+        super().progress(tag, amount)
        self.redis_client.incrby(
            self.redis_connector_index.generator_progress_key, amount
        )
@@ -318,7 +332,7 @@ def validate_indexing_fences(
        if not key_str.startswith(RedisConnectorIndex.FENCE_PREFIX):
            continue

-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            validate_indexing_fence(
                tenant_id,
                key_bytes,
--- a/backend/onyx/background/celery/tasks/llm_model_update/tasks.py
+++ b/backend/onyx/background/celery/tasks/llm_model_update/tasks.py
@@ -8,7 +8,7 @@ from onyx.background.celery.apps.app_base import task_logger
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.app_configs import LLM_MODEL_UPDATE_API_URL
 from onyx.configs.constants import OnyxCeleryTask
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.models import LLMProvider


@@ -75,7 +75,7 @@ def check_for_llm_model_update(self: Task, *, tenant_id: str | None) -> bool | N
        return None

    # Then update the database with the fetched models
-    with get_session_with_tenant(tenant_id) as db_session:
+    with get_session_with_current_tenant() as db_session:
        # Get the default LLM provider
        default_provider = (
            db_session.query(LLMProvider)
--- a/backend/onyx/background/celery/tasks/monitoring/tasks.py
+++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py
@@ -26,7 +26,8 @@ from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.db.engine import get_all_tenant_ids
 from onyx.db.engine import get_db_current_time
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_shared_schema
 from onyx.db.enums import IndexingStatus
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
@@ -42,7 +43,6 @@ from onyx.utils.telemetry import optional_telemetry
 from onyx.utils.telemetry import RecordType
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

-
 _MONITORING_SOFT_TIME_LIMIT = 60 * 5  # 5 minutes
 _MONITORING_TIME_LIMIT = _MONITORING_SOFT_TIME_LIMIT + 60  # 6 minutes

@@ -668,7 +668,7 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

    task_logger.info("Starting background monitoring")
-    r = get_redis_client(tenant_id=tenant_id)
+    r = get_redis_client()

    lock_monitoring: RedisLock = r.lock(
        OnyxRedisLocks.MONITOR_BACKGROUND_PROCESSES_LOCK,
@@ -683,7 +683,7 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
    try:
        # Get Redis client for Celery broker
        redis_celery = self.app.broker_connection().channel().client  # type: ignore
-        redis_std = get_redis_client(tenant_id=tenant_id)
+        redis_std = get_redis_client()

        # Define metric collection functions and their dependencies
        metric_functions: list[Callable[[], list[Metric]]] = [
@@ -693,7 +693,7 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
        ]

        # Collect and log each metric
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            for metric_fn in metric_functions:
                metrics = metric_fn()
                for metric in metrics:
@@ -771,12 +771,11 @@ def cloud_check_alembic() -> bool | None:
            if tenant_id is None:
                continue

-            with get_session_with_tenant(tenant_id=None) as session:
+            with get_session_with_shared_schema() as session:
                try:
                    result = session.execute(
                        text(f'SELECT * FROM "{tenant_id}".alembic_version LIMIT 1')
                    )
-
                    result_scalar: str | None = result.scalar_one_or_none()
                    if result_scalar is None:
                        raise ValueError("Alembic version should not be None.")
--- a/backend/onyx/background/celery/tasks/periodic/tasks.py
+++ b/backend/onyx/background/celery/tasks/periodic/tasks.py
@@ -15,7 +15,7 @@ from onyx.background.celery.apps.app_base import task_logger
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import PostgresAdvisoryLocks
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant


@shared_task(
@@ -36,7 +36,7 @@ def kombu_message_cleanup_task(self: Any, tenant_id: str | None) -> int:
    ctx["deleted"] = 0
    ctx["cleanup_age"] = KOMBU_MESSAGE_CLEANUP_AGE
    ctx["page_limit"] = KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT
-    with get_session_with_tenant(tenant_id) as db_session:
+    with get_session_with_current_tenant() as db_session:
        # Exit the task if we can't take the advisory lock
        result = db_session.execute(
            text("SELECT pg_try_advisory_lock(:id)"),
--- a/backend/onyx/background/celery/tasks/pruning/tasks.py
+++ b/backend/onyx/background/celery/tasks/pruning/tasks.py
@@ -21,7 +21,7 @@ from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_queued_task_ids
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.background.celery.celery_utils import extract_ids_from_runnable_connector
-from onyx.background.celery.tasks.indexing.utils import IndexingCallback
+from onyx.background.celery.tasks.indexing.utils import IndexingCallbackBase
 from onyx.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
@@ -41,7 +41,7 @@ from onyx.db.connector_credential_pair import get_connector_credential_pair
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
 from onyx.db.document import get_documents_for_connector_credential_pair
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
@@ -62,6 +62,12 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


+class PruneCallback(IndexingCallbackBase):
+    def progress(self, tag: str, amount: int) -> None:
+        self.redis_connector.prune.set_active()
+        super().progress(tag, amount)
+
+
 """Jobs / utils for kicking off pruning tasks."""


@@ -108,8 +114,8 @@ def _is_pruning_due(cc_pair: ConnectorCredentialPair) -> bool:
    bind=True,
 )
 def check_for_pruning(self: Task, *, tenant_id: str | None) -> bool | None:
-    r = get_redis_client(tenant_id=tenant_id)
-    r_replica = get_redis_replica_client(tenant_id=tenant_id)
+    r = get_redis_client()
+    r_replica = get_redis_replica_client()
    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
@@ -127,14 +133,14 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> bool | None:
        # but pruning only kicks off once per hour
        if not r.exists(OnyxRedisSignals.BLOCK_PRUNING):
            cc_pair_ids: list[int] = []
-            with get_session_with_tenant(tenant_id) as db_session:
+            with get_session_with_current_tenant() as db_session:
                cc_pairs = get_connector_credential_pairs(db_session)
                for cc_pair_entry in cc_pairs:
                    cc_pair_ids.append(cc_pair_entry.id)

            for cc_pair_id in cc_pair_ids:
                lock_beat.reacquire()
-                with get_session_with_tenant(tenant_id) as db_session:
+                with get_session_with_current_tenant() as db_session:
                    cc_pair = get_connector_credential_pair_from_id(
                        db_session=db_session,
                        cc_pair_id=cc_pair_id,
@@ -182,7 +188,7 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> bool | None:

            key_str = key_bytes.decode("utf-8")
            if key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):
-                with get_session_with_tenant(tenant_id) as db_session:
+                with get_session_with_current_tenant() as db_session:
                    monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session)
    except SoftTimeLimitExceeded:
        task_logger.info(
@@ -337,7 +343,7 @@ def connector_pruning_generator_task(

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

-    r = get_redis_client(tenant_id=tenant_id)
+    r = get_redis_client()

    # this wait is needed to avoid a race condition where
    # the primary worker sends the task and it is immediately executed
@@ -395,7 +401,7 @@ def connector_pruning_generator_task(
        return None

    try:
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            cc_pair = get_connector_credential_pair(
                db_session=db_session,
                connector_id=connector_id,
@@ -425,6 +431,7 @@ def connector_pruning_generator_task(
                f"cc_pair={cc_pair_id} "
                f"connector_source={cc_pair.connector.source}"
            )
+
            runnable_connector = instantiate_connector(
                db_session,
                cc_pair.connector.source,
@@ -434,12 +441,11 @@ def connector_pruning_generator_task(
            )

            search_settings = get_current_search_settings(db_session)
-            redis_connector_index = redis_connector.new_index(search_settings.id)
+            redis_connector.new_index(search_settings.id)

-            callback = IndexingCallback(
+            callback = PruneCallback(
                0,
                redis_connector,
-                redis_connector_index,
                lock,
                r,
            )
--- a/backend/onyx/background/celery/tasks/shared/tasks.py
+++ b/backend/onyx/background/celery/tasks/shared/tasks.py
@@ -27,7 +27,7 @@ from onyx.db.document import mark_document_as_modified
 from onyx.db.document import mark_document_as_synced
 from onyx.db.document_set import fetch_document_sets_for_document
 from onyx.db.engine import get_all_tenant_ids
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.search_settings import get_active_search_settings
 from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import VespaDocumentFields
@@ -79,7 +79,7 @@ def document_by_cc_pair_cleanup_task(
    start = time.monotonic()

    try:
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            action = "skip"
            chunks_affected = 0

@@ -205,7 +205,7 @@ def document_by_cc_pair_cleanup_task(
                f"Max celery task retries reached. Marking doc as dirty for reconciliation: "
                f"doc={document_id}"
            )
-            with get_session_with_tenant(tenant_id) as db_session:
+            with get_session_with_current_tenant() as db_session:
                # delete the cc pair relationship now and let reconciliation clean it up
                # in vespa
                delete_document_by_connector_credential_pair__no_commit(
--- a/backend/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/onyx/background/celery/tasks/vespa/tasks.py
@@ -34,7 +34,7 @@ from onyx.db.document_set import fetch_document_sets
 from onyx.db.document_set import fetch_document_sets_for_document
 from onyx.db.document_set import get_document_set_by_id
 from onyx.db.document_set import mark_document_set_as_synced
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
 from onyx.db.models import DocumentSet
@@ -84,8 +84,8 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No

    time_start = time.monotonic()

-    r = get_redis_client(tenant_id=tenant_id)
-    r_replica = get_redis_replica_client(tenant_id=tenant_id)
+    r = get_redis_client()
+    r_replica = get_redis_replica_client()

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK,
@@ -98,7 +98,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No

    try:
        # 1/3: KICKOFF
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            try_generate_stale_document_sync_tasks(
                self.app, VESPA_SYNC_MAX_TASKS, db_session, r, lock_beat, tenant_id
            )
@@ -106,7 +106,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No
        # region document set scan
        lock_beat.reacquire()
        document_set_ids: list[int] = []
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            # check if any document sets are not synced
            document_set_info = fetch_document_sets(
                user_id=None, db_session=db_session, include_outdated=True
@@ -117,7 +117,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No

        for document_set_id in document_set_ids:
            lock_beat.reacquire()
-            with get_session_with_tenant(tenant_id) as db_session:
+            with get_session_with_current_tenant() as db_session:
                try_generate_document_set_sync_tasks(
                    self.app, document_set_id, db_session, r, lock_beat, tenant_id
                )
@@ -136,7 +136,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No
                pass
            else:
                usergroup_ids: list[int] = []
-                with get_session_with_tenant(tenant_id) as db_session:
+                with get_session_with_current_tenant() as db_session:
                    user_groups = fetch_user_groups(
                        db_session=db_session, only_up_to_date=False
                    )
@@ -146,7 +146,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No

                for usergroup_id in usergroup_ids:
                    lock_beat.reacquire()
-                    with get_session_with_tenant(tenant_id) as db_session:
+                    with get_session_with_current_tenant() as db_session:
                        try_generate_user_group_sync_tasks(
                            self.app, usergroup_id, db_session, r, lock_beat, tenant_id
                        )
@@ -167,7 +167,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No
            if key_str == RedisGlobalConnectorCredentialPair.FENCE_KEY:
                monitor_connector_taskset(r)
            elif key_str.startswith(RedisDocumentSet.FENCE_PREFIX):
-                with get_session_with_tenant(tenant_id) as db_session:
+                with get_session_with_current_tenant() as db_session:
                    monitor_document_set_taskset(tenant_id, key_bytes, r, db_session)
            elif key_str.startswith(RedisUserGroup.FENCE_PREFIX):
                monitor_usergroup_taskset = (
@@ -177,7 +177,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No
                        noop_fallback,
                    )
                )
-                with get_session_with_tenant(tenant_id) as db_session:
+                with get_session_with_current_tenant() as db_session:
                    monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session)

    except SoftTimeLimitExceeded:
@@ -523,12 +523,12 @@ def monitor_document_set_taskset(
    max_retries=3,
 )
 def vespa_metadata_sync_task(
-    self: Task, document_id: str, tenant_id: str | None
+    self: Task, document_id: str, *, tenant_id: str | None
 ) -> bool:
    start = time.monotonic()

    try:
-        with get_session_with_tenant(tenant_id) as db_session:
+        with get_session_with_current_tenant() as db_session:
            active_search_settings = get_active_search_settings(db_session)
            doc_index = get_default_document_index(
                search_settings=active_search_settings.primary,
--- a/backend/onyx/background/error_logging.py
+++ b/backend/onyx/background/error_logging.py
@@ -1,5 +1,5 @@
 from onyx.db.background_error import create_background_error
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant


 def emit_background_error(
@@ -9,5 +9,5 @@ def emit_background_error(
    """Currently just saves a row in the background_errors table.

    In the future, could create notifications based on the severity."""
-    with get_session_with_tenant() as db_session:
+    with get_session_with_current_tenant() as db_session:
        create_background_error(db_session, message, cc_pair_id)
--- a/backend/onyx/background/indexing/run_indexing.py
+++ b/backend/onyx/background/indexing/run_indexing.py
@@ -15,12 +15,14 @@ from onyx.background.indexing.memory_tracer import MemoryTracer
 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.app_configs import INDEXING_SIZE_WARNING_THRESHOLD
 from onyx.configs.app_configs import INDEXING_TRACER_INTERVAL
+from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
 from onyx.configs.app_configs import LEAVE_CONNECTOR_ACTIVE_ON_INITIALIZATION_FAILURE
 from onyx.configs.app_configs import POLL_CONNECTOR_OFFSET
 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import MilestoneRecordType
 from onyx.connectors.connector_runner import ConnectorRunner
 from onyx.connectors.factory import instantiate_connector
+from onyx.connectors.interfaces import ConnectorValidationError
 from onyx.connectors.models import ConnectorCheckpoint
 from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import Document
@@ -28,7 +30,7 @@ from onyx.connectors.models import IndexAttemptMetadata
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_last_successful_attempt_time
 from onyx.db.connector_credential_pair import update_connector_credential_pair
-from onyx.db.engine import get_session_with_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.index_attempt import create_index_attempt_error
 from onyx.db.index_attempt import get_index_attempt
@@ -86,6 +88,11 @@ def _get_connector_runner(
            credential=attempt.connector_credential_pair.credential,
            tenant_id=tenant_id,
        )
+
+        # validate the connector settings
+        if not INTEGRATION_TESTS_MODE:
+            runnable_connector.validate_connector_settings()
+
    except Exception as e:
        logger.exception(f"Unable to instantiate connector due to {e}")

@@ -244,7 +251,7 @@ def _run_indexing(
    """
    start_time = time.monotonic()  # jsut used for logging

-    with get_session_with_tenant(tenant_id) as db_session_temp:
+    with get_session_with_current_tenant() as db_session_temp:
        index_attempt_start = get_index_attempt(db_session_temp, index_attempt_id)
        if not index_attempt_start:
            raise ValueError(
@@ -370,7 +377,7 @@ def _run_indexing(
    document_count = 0
    chunk_count = 0
    try:
-        with get_session_with_tenant(tenant_id) as db_session_temp:
+        with get_session_with_current_tenant() as db_session_temp:
            index_attempt = get_index_attempt(db_session_temp, index_attempt_id)
            if not index_attempt:
                raise RuntimeError(f"Index attempt {index_attempt_id} not found in DB.")
@@ -430,7 +437,7 @@ def _run_indexing(
                        raise ConnectorStopSignal("Connector stop signal detected")

                # TODO: should we move this into the above callback instead?
-                with get_session_with_tenant(tenant_id) as db_session_temp:
+                with get_session_with_current_tenant() as db_session_temp:
                    # will exception if the connector/index attempt is marked as paused/failed
                    _check_connector_and_attempt_status(
                        db_session_temp, ctx, index_attempt_id
@@ -439,7 +446,7 @@ def _run_indexing(
                # save record of any failures at the connector level
                if failure is not None:
                    total_failures += 1
-                    with get_session_with_tenant(tenant_id) as db_session_temp:
+                    with get_session_with_current_tenant() as db_session_temp:
                        create_index_attempt_error(
                            index_attempt_id,
                            ctx.cc_pair_id,
@@ -503,7 +510,7 @@ def _run_indexing(
                    if document.id not in failed_document_ids
                ]
                for document_id in successful_document_ids:
-                    with get_session_with_tenant(tenant_id) as db_session_temp:
+                    with get_session_with_current_tenant() as db_session_temp:
                        if document_id in doc_id_to_unresolved_errors:
                            logger.info(
                                f"Resolving IndexAttemptError for document '{document_id}'"
@@ -516,7 +523,7 @@ def _run_indexing(
                # add brand new failures
                if index_pipeline_result.failures:
                    total_failures += len(index_pipeline_result.failures)
-                    with get_session_with_tenant(tenant_id) as db_session_temp:
+                    with get_session_with_current_tenant() as db_session_temp:
                        for failure in index_pipeline_result.failures:
                            create_index_attempt_error(
                                index_attempt_id,
@@ -533,7 +540,7 @@ def _run_indexing(
                    )

                # This new value is updated every batch, so UI can refresh per batch update
-                with get_session_with_tenant(tenant_id) as db_session_temp:
+                with get_session_with_current_tenant() as db_session_temp:
                    # NOTE: Postgres uses the start of the transactions when computing `NOW()`
                    # so we need either to commit() or to use a new session
                    update_docs_indexed(
@@ -555,7 +562,7 @@ def _run_indexing(
                check_checkpoint_size(checkpoint)

            # save latest checkpoint
-            with get_session_with_tenant(tenant_id) as db_session_temp:
+            with get_session_with_current_tenant() as db_session_temp:
                save_checkpoint(
                    db_session=db_session_temp,
                    index_attempt_id=index_attempt_id,
@@ -567,9 +574,29 @@ def _run_indexing(
            "Connector run exceptioned after elapsed time: "
            f"{time.monotonic() - start_time} seconds"
        )
+        if isinstance(e, ConnectorValidationError):
+            # On validation errors during indexing, we want to cancel the indexing attempt
+            # and mark the CCPair as invalid. This prevents the connector from being
+            # used in the future until the credentials are updated.
+            with get_session_with_current_tenant() as db_session_temp:
+                mark_attempt_canceled(
+                    index_attempt_id,
+                    db_session_temp,
+                    reason=str(e),
+                )

-        if isinstance(e, ConnectorStopSignal):
-            with get_session_with_tenant(tenant_id) as db_session_temp:
+                if ctx.is_primary:
+                    update_connector_credential_pair(
+                        db_session=db_session_temp,
+                        connector_id=ctx.connector_id,
+                        credential_id=ctx.credential_id,
+                        status=ConnectorCredentialPairStatus.INVALID,
+                    )
+            memory_tracer.stop()
+            raise e
+
+        elif isinstance(e, ConnectorStopSignal):
+            with get_session_with_current_tenant() as db_session_temp:
                mark_attempt_canceled(
                    index_attempt_id,
                    db_session_temp,
@@ -587,7 +614,7 @@ def _run_indexing(
            memory_tracer.stop()
            raise e
        else:
-            with get_session_with_tenant(tenant_id) as db_session_temp:
+            with get_session_with_current_tenant() as db_session_temp:
                mark_attempt_failed(
                    index_attempt_id,
                    db_session_temp,
@@ -609,7 +636,7 @@ def _run_indexing(
    memory_tracer.stop()

    elapsed_time = time.monotonic() - start_time
-    with get_session_with_tenant(tenant_id) as db_session_temp:
+    with get_session_with_current_tenant() as db_session_temp:
        # resolve entity-based errors
        for error in entity_based_unresolved_errors:
            logger.info(f"Resolving IndexAttemptError for entity '{error.entity_id}'")
@@ -669,7 +696,7 @@ def run_indexing_entrypoint(
    TaskAttemptSingleton.set_cc_and_index_id(
        index_attempt_id, connector_credential_pair_id
    )
-    with get_session_with_tenant(tenant_id) as db_session:
+    with get_session_with_current_tenant() as db_session:
        # TODO: remove long running session entirely
        attempt = transition_attempt_to_in_progress(index_attempt_id, db_session)

@@ -690,7 +717,7 @@ def run_indexing_entrypoint(
        f"credentials='{credential_id}'"
    )

-    with get_session_with_tenant(tenant_id) as db_session:
+    with get_session_with_current_tenant() as db_session:
        _run_indexing(db_session, index_attempt_id, tenant_id, callback)

    logger.info(
--- a/backend/onyx/chat/answer.py
+++ b/backend/onyx/chat/answer.py
@@ -27,8 +27,10 @@ from onyx.file_store.utils import InMemoryChatFile
 from onyx.llm.interfaces import LLM
 from onyx.tools.force import ForceUseTool
 from onyx.tools.tool import Tool
+from onyx.tools.tool_implementations.search.search_tool import QUERY_FIELD
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.utils import explicit_tool_calling_supported
+from onyx.utils.gpu_utils import gpu_status_request
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -80,6 +82,26 @@ class Answer:
            and not skip_explicit_tool_calling
        )

+        rerank_settings = search_request.rerank_settings
+
+        using_cloud_reranking = (
+            rerank_settings is not None
+            and rerank_settings.rerank_provider_type is not None
+        )
+        allow_agent_reranking = gpu_status_request() or using_cloud_reranking
+
+        # TODO: this is a hack to force the query to be used for the search tool
+        #       this should be removed once we fully unify graph inputs (i.e.
+        #       remove SearchQuery entirely)
+        if (
+            force_use_tool.force_use
+            and search_tool
+            and force_use_tool.args
+            and force_use_tool.tool_name == search_tool.name
+            and QUERY_FIELD in force_use_tool.args
+        ):
+            search_request.query = force_use_tool.args[QUERY_FIELD]
+
        self.graph_inputs = GraphInputs(
            search_request=search_request,
            prompt_builder=prompt_builder,
@@ -94,7 +116,6 @@ class Answer:
            force_use_tool=force_use_tool,
            using_tool_calling_llm=using_tool_calling_llm,
        )
-        assert db_session, "db_session must be provided for agentic persistence"
        self.graph_persistence = GraphPersistence(
            db_session=db_session,
            chat_session_id=chat_session_id,
@@ -104,6 +125,7 @@ class Answer:
            use_agentic_search=use_agentic_search,
            skip_gen_ai_answer_generation=skip_gen_ai_answer_generation,
            allow_refinement=True,
+            allow_agent_reranking=allow_agent_reranking,
        )
        self.graph_config = GraphConfig(
            inputs=self.graph_inputs,
--- a/backend/onyx/chat/chat_utils.py
+++ b/backend/onyx/chat/chat_utils.py
@@ -190,7 +190,8 @@ def create_chat_chain(
            and previous_message.message_type == MessageType.ASSISTANT
            and mainline_messages
        ):
-            mainline_messages[-1] = current_message
+            if current_message.refined_answer_improvement:
+                mainline_messages[-1] = current_message
        else:
            mainline_messages.append(current_message)

--- a/backend/onyx/chat/models.py
+++ b/backend/onyx/chat/models.py
@@ -142,6 +142,15 @@ class MessageResponseIDInfo(BaseModel):
    reserved_assistant_message_id: int


+class AgentMessageIDInfo(BaseModel):
+    level: int
+    message_id: int
+
+
+class AgenticMessageResponseIDInfo(BaseModel):
+    agentic_message_ids: list[AgentMessageIDInfo]
+
+
 class StreamingError(BaseModel):
    error: str
    stack_trace: str | None = None
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -7,10 +7,12 @@ from typing import cast

 from sqlalchemy.orm import Session

-from onyx.agents.agent_search.orchestration.nodes.tool_call import ToolCallException
+from onyx.agents.agent_search.orchestration.nodes.call_tool import ToolCallException
 from onyx.chat.answer import Answer
 from onyx.chat.chat_utils import create_chat_chain
 from onyx.chat.chat_utils import create_temporary_persona
+from onyx.chat.models import AgenticMessageResponseIDInfo
+from onyx.chat.models import AgentMessageIDInfo
 from onyx.chat.models import AgentSearchPacket
 from onyx.chat.models import AllCitations
 from onyx.chat.models import AnswerPostInfo
@@ -143,9 +145,10 @@ from onyx.utils.long_term_log import LongTermLogger
 from onyx.utils.telemetry import mt_cloud_telemetry
 from onyx.utils.timing import log_function_time
 from onyx.utils.timing import log_generator_function_time
-from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
+from shared_configs.contextvars import get_current_tenant_id

 logger = setup_logger()
+ERROR_TYPE_CANCELLED = "cancelled"


 def _translate_citations(
@@ -307,6 +310,7 @@ ChatPacket = (
    | CustomToolResponse
    | MessageSpecificCitations
    | MessageResponseIDInfo
+    | AgenticMessageResponseIDInfo
    | StreamStopInfo
    | AgentSearchPacket
 )
@@ -342,7 +346,7 @@ def stream_chat_message_objects(
    3. [always] A set of streamed LLM tokens or an error anywhere along the line if something fails
    4. [always] Details on the final AI response message that is created
    """
-    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
+    tenant_id = get_current_tenant_id()
    use_existing_user_message = new_msg_req.use_existing_user_message
    existing_assistant_message_id = new_msg_req.existing_assistant_message_id

@@ -631,6 +635,7 @@ def stream_chat_message_objects(
            db_session=db_session,
            commit=False,
            reserved_message_id=reserved_message_id,
+            is_agentic=new_msg_req.use_agentic_search,
        )

        prompt_override = new_msg_req.prompt_override or chat_session.prompt_override
@@ -1015,7 +1020,7 @@ def stream_chat_message_objects(
                if info.message_specific_citations
                else None
            ),
-            error=None,
+            error=ERROR_TYPE_CANCELLED if answer.is_cancelled() else None,
            tool_call=(
                ToolCall(
                    tool_id=tool_name_to_tool_id[info.tool_result.tool_name],
@@ -1033,6 +1038,7 @@ def stream_chat_message_objects(
        next_level = 1
        prev_message = gen_ai_response_message
        agent_answers = answer.llm_answer_by_level()
+        agentic_message_ids = []
        while next_level in agent_answers:
            next_answer = agent_answers[next_level]
            info = info_by_subq[
@@ -1053,7 +1059,12 @@ def stream_chat_message_objects(
                citations=info.message_specific_citations.citation_map
                if info.message_specific_citations
                else None,
+                error=ERROR_TYPE_CANCELLED if answer.is_cancelled() else None,
                refined_answer_improvement=refined_answer_improvement,
+                is_agentic=True,
+            )
+            agentic_message_ids.append(
+                AgentMessageIDInfo(level=next_level, message_id=next_answer_message.id)
            )
            next_level += 1
            prev_message = next_answer_message
@@ -1061,11 +1072,9 @@ def stream_chat_message_objects(
        logger.debug("Committing messages")
        db_session.commit()  # actually save user / assistant message

-        msg_detail_response = translate_db_message_to_chat_message_detail(
-            gen_ai_response_message
-        )
+        yield AgenticMessageResponseIDInfo(agentic_message_ids=agentic_message_ids)

-        yield msg_detail_response
+        yield translate_db_message_to_chat_message_detail(gen_ai_response_message)
    except Exception as e:
        error_msg = str(e)
        logger.exception(error_msg)
--- a/backend/onyx/configs/agent_configs.py
+++ b/backend/onyx/configs/agent_configs.py
@@ -31,22 +31,9 @@ AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
 AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
 AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000

-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = 30  # in seconds
-
-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = 10  # in seconds
-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = 25  # in seconds
-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = 4  # in seconds
-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = 1  # in seconds
-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = 3  # in seconds
-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = 12  # in seconds
-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = 8  # in seconds
-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = 25  # in seconds
-
-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = 6  # in seconds
-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = 25  # in seconds
-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = 8  # in seconds
-AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = 8  # in seconds
-
+AGENT_ANSWER_GENERATION_BY_FAST_LLM = (
+    os.environ.get("AGENT_ANSWER_GENERATION_BY_FAST_LLM", "").lower() == "true"
+)

 AGENT_RETRIEVAL_STATS = (
    not os.environ.get("AGENT_RETRIEVAL_STATS") == "False"
@@ -178,80 +165,172 @@ AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
 )  # 2000


-AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION
-)  # 25
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION = 10  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION
+)
+
+AGENT_DEFAULT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION = 30  # in seconds
+AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION")
+    or AGENT_DEFAULT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION
+)


-AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
-)  # 3
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION = 3  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION
+)

-AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION
-)  # 30
+AGENT_DEFAULT_TIMEOUT_LLM_DOCUMENT_VERIFICATION = 5  # in seconds
+AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION")
+    or AGENT_DEFAULT_TIMEOUT_LLM_DOCUMENT_VERIFICATION
+)


-AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION
-)  # 8
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION = 5  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION
+)
+
+AGENT_DEFAULT_TIMEOUT_LLM_GENERAL_GENERATION = 30  # in seconds
+AGENT_TIMEOUT_LLM_GENERAL_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_GENERAL_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_LLM_GENERAL_GENERATION
+)


-AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
-)  # 12
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION = 4  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION
+)
+
+AGENT_DEFAULT_TIMEOUT_LLM_SUBQUESTION_GENERATION = 5  # in seconds
+AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_LLM_SUBQUESTION_GENERATION
+)


-AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION
-)  # 25
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = 4  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
+)
+
+AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION = 30  # in seconds
+AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION
+)


-AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
-)  # 25
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = 5  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION
+)
+
+AGENT_DEFAULT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = 25  # in seconds
+AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION
+)


-AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
-)  # 8
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = 5  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
+)
+
+AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = 30  # in seconds
+AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION
+)


-AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION
-)  # 6
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK = 4  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK
+)
+
+AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_CHECK = 8  # in seconds
+AGENT_TIMEOUT_LLM_SUBANSWER_CHECK = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_SUBANSWER_CHECK")
+    or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_CHECK
+)


-AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION
-)  # 1
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION = 4  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION
+)
+
+AGENT_DEFAULT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION = 8  # in seconds
+AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION
+)


-AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION
-)  # 4
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION = 2  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION
+)
+
+AGENT_DEFAULT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION = 3  # in seconds
+AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION
+)


-AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
-)  # 8
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION = 4  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION
+)
+
+AGENT_DEFAULT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION = 5  # in seconds
+AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION
+)


-AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = int(
-    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION")
-    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
-)  # 8
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS = 4  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS
+)
+
+AGENT_DEFAULT_TIMEOUT_LLM_COMPARE_ANSWERS = 8  # in seconds
+AGENT_TIMEOUT_LLM_COMPARE_ANSWERS = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_COMPARE_ANSWERS")
+    or AGENT_DEFAULT_TIMEOUT_LLM_COMPARE_ANSWERS
+)
+
+
+AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION = 4  # in seconds
+AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION = int(
+    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION")
+    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION
+)
+
+AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = 8  # in seconds
+AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = int(
+    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION")
+    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION
+)

 GRAPH_VERSION_NAME: str = "a"
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -158,7 +158,7 @@ POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres"
 POSTGRES_PASSWORD = urllib.parse.quote_plus(
    os.environ.get("POSTGRES_PASSWORD") or "password"
 )
-POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost"
+POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "127.0.0.1"
 POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432"
 POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres"
 AWS_REGION_NAME = os.environ.get("AWS_REGION_NAME") or "us-east-2"
@@ -626,6 +626,8 @@ POD_NAMESPACE = os.environ.get("POD_NAMESPACE")

 DEV_MODE = os.environ.get("DEV_MODE", "").lower() == "true"

+INTEGRATION_TESTS_MODE = os.environ.get("INTEGRATION_TESTS_MODE", "").lower() == "true"
+
 MOCK_CONNECTOR_FILE_PATH = os.environ.get("MOCK_CONNECTOR_FILE_PATH")

 TEST_ENV = os.environ.get("TEST_ENV", "").lower() == "true"
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -98,9 +98,18 @@ CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT = 120

 CELERY_PRIMARY_WORKER_LOCK_TIMEOUT = 120

-# needs to be long enough to cover the maximum time it takes to download an object
+
+# hard timeout applied by the watchdog to the indexing connector run
+# to handle hung connectors
+CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT = 3 * 60 * 60  # 3 hours (in seconds)
+
+# soft timeout for the lock taken by the indexing connector run
+# allows the lock to eventually expire if the managing code around it dies
 # if we can get callbacks as object bytes download, we could lower this a lot.
-CELERY_INDEXING_LOCK_TIMEOUT = 3 * 60 * 60  # 60 min
+# CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT + 15 minutes
+# hard termination should always fire first if the connector is hung
+CELERY_INDEXING_LOCK_TIMEOUT = CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT + 900
+

 # how long a task should wait for associated fence to be ready
 CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT = 5 * 60  # 5 min
--- a/backend/onyx/configs/integration_test_configs.py
+++ b/backend/onyx/configs/integration_test_configs.py
@@ -1,6 +0,0 @@
-import os
-
-
-SKIP_CONNECTION_POOL_WARM_UP = (
-    os.environ.get("SKIP_CONNECTION_POOL_WARM_UP", "").lower() == "true"
-)
--- a/backend/onyx/connectors/bookstack/client.py
+++ b/backend/onyx/connectors/bookstack/client.py
@@ -5,6 +5,8 @@ import requests

 class BookStackClientRequestFailedError(ConnectionError):
    def __init__(self, status: int, error: str) -> None:
+        self.status_code = status
+        self.error = error
        super().__init__(
            "BookStack Client request failed with status {status}: {error}".format(
                status=status, error=error
--- a/backend/onyx/connectors/bookstack/connector.py
+++ b/backend/onyx/connectors/bookstack/connector.py
@@ -7,8 +7,12 @@ from typing import Any
 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
 from onyx.connectors.bookstack.client import BookStackApiClient
+from onyx.connectors.bookstack.client import BookStackClientRequestFailedError
 from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
+from onyx.connectors.interfaces import ConnectorValidationError
+from onyx.connectors.interfaces import CredentialExpiredError
 from onyx.connectors.interfaces import GenerateDocumentsOutput
+from onyx.connectors.interfaces import InsufficientPermissionsError
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
@@ -214,3 +218,39 @@ class BookstackConnector(LoadConnector, PollConnector):
                    break
                else:
                    time.sleep(0.2)
+
+    def validate_connector_settings(self) -> None:
+        """
+        Validate that the BookStack credentials and connector settings are correct.
+        Specifically checks that we can make an authenticated request to BookStack.
+        """
+        if not self.bookstack_client:
+            raise ConnectorMissingCredentialError(
+                "BookStack credentials have not been loaded."
+            )
+
+        try:
+            # Attempt to fetch a small batch of books (arbitrary endpoint) to verify credentials
+            _ = self.bookstack_client.get(
+                "/books", params={"count": "1", "offset": "0"}
+            )
+
+        except BookStackClientRequestFailedError as e:
+            # Check for HTTP status codes
+            if e.status_code == 401:
+                raise CredentialExpiredError(
+                    "Your BookStack credentials appear to be invalid or expired (HTTP 401)."
+                ) from e
+            elif e.status_code == 403:
+                raise InsufficientPermissionsError(
+                    "The configured BookStack token does not have sufficient permissions (HTTP 403)."
+                ) from e
+            else:
+                raise ConnectorValidationError(
+                    f"Unexpected BookStack error (status={e.status_code}): {e}"
+                ) from e
+
+        except Exception as exc:
+            raise ConnectorValidationError(
+                f"Unexpected error while validating BookStack connector settings: {exc}"
+            ) from exc
--- a/backend/onyx/connectors/confluence/onyx_confluence.py
+++ b/backend/onyx/connectors/confluence/onyx_confluence.py
@@ -8,6 +8,7 @@ from typing import TypeVar
 from urllib.parse import quote

 from atlassian import Confluence  # type:ignore
+from pydantic import BaseModel
 from requests import HTTPError

 from onyx.utils.logger import setup_logger
@@ -29,6 +30,16 @@ class ConfluenceRateLimitError(Exception):
    pass


+class ConfluenceUser(BaseModel):
+    user_id: str  # accountId in Cloud, userKey in Server
+    username: str | None  # Confluence Cloud doesn't give usernames
+    display_name: str
+    # Confluence Data Center doesn't give email back by default,
+    # have to fetch it with a different endpoint
+    email: str | None
+    type: str
+
+
 def _handle_http_error(e: HTTPError, attempt: int) -> int:
    MIN_DELAY = 2
    MAX_DELAY = 60
@@ -275,21 +286,95 @@ class OnyxConfluence(Confluence):
        self,
        expand: str | None = None,
        limit: int | None = None,
-    ) -> Iterator[dict[str, Any]]:
+    ) -> Iterator[ConfluenceUser]:
        """
        The search/user endpoint can be used to fetch users.
        It's a seperate endpoint from the content/search endpoint used only for users.
        Otherwise it's very similar to the content/search endpoint.
        """
-        cql = "type=user"
-        url = "rest/api/search/user" if self.cloud else "rest/api/search"
-        expand_string = f"&expand={expand}" if expand else ""
-        url += f"?cql={cql}{expand_string}"
-        yield from self._paginate_url(url, limit)
+        if self.cloud:
+            cql = "type=user"
+            url = "rest/api/search/user"
+            expand_string = f"&expand={expand}" if expand else ""
+            url += f"?cql={cql}{expand_string}"
+            for user_result in self._paginate_url(url, limit):
+                # Example response:
+                # {
+                #     'user': {
+                #         'type': 'known',
+                #         'accountId': '712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',
+                #         'accountType': 'atlassian',
+                #         'email': 'chris@danswer.ai',
+                #         'publicName': 'Chris Weaver',
+                #         'profilePicture': {
+                #             'path': '/wiki/aa-avatar/712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',
+                #             'width': 48,
+                #             'height': 48,
+                #             'isDefault': False
+                #         },
+                #         'displayName': 'Chris Weaver',
+                #         'isExternalCollaborator': False,
+                #         '_expandable': {
+                #             'operations': '',
+                #             'personalSpace': ''
+                #         },
+                #         '_links': {
+                #             'self': 'https://danswerai.atlassian.net/wiki/rest/api/user?accountId=712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d'
+                #         }
+                #     },
+                #     'title': 'Chris Weaver',
+                #     'excerpt': '',
+                #     'url': '/people/712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',
+                #     'breadcrumbs': [],
+                #     'entityType': 'user',
+                #     'iconCssClass': 'aui-icon content-type-profile',
+                #     'lastModified': '2025-02-18T04:08:03.579Z',
+                #     'score': 0.0
+                # }
+                user = user_result["user"]
+                yield ConfluenceUser(
+                    user_id=user["accountId"],
+                    username=None,
+                    display_name=user["displayName"],
+                    email=user.get("email"),
+                    type=user["accountType"],
+                )
+        else:
+            # https://developer.atlassian.com/server/confluence/rest/v900/api-group-user/#api-rest-api-user-list-get
+            # ^ is only available on data center deployments
+            # Example response:
+            # [
+            #     {
+            #         'type': 'known',
+            #         'username': 'admin',
+            #         'userKey': '40281082950c5fe901950c61c55d0000',
+            #         'profilePicture': {
+            #             'path': '/images/icons/profilepics/default.svg',
+            #             'width': 48,
+            #             'height': 48,
+            #             'isDefault': True
+            #         },
+            #         'displayName': 'Admin Test',
+            #         '_links': {
+            #             'self': 'http://localhost:8090/rest/api/user?key=40281082950c5fe901950c61c55d0000'
+            #         },
+            #         '_expandable': {
+            #             'status': ''
+            #         }
+            #     }
+            # ]
+            for user in self._paginate_url("rest/api/user/list", limit):
+                yield ConfluenceUser(
+                    user_id=user["userKey"],
+                    username=user["username"],
+                    display_name=user["displayName"],
+                    email=None,
+                    type=user.get("type", "user"),
+                )

    def paginated_groups_by_user_retrieval(
        self,
-        user: dict[str, Any],
+        user_id: str,  # accountId in Cloud, userKey in Server
        limit: int | None = None,
    ) -> Iterator[dict[str, Any]]:
        """
@@ -297,7 +382,7 @@ class OnyxConfluence(Confluence):
        It's a confluence specific endpoint that can be used to fetch groups.
        """
        user_field = "accountId" if self.cloud else "key"
-        user_value = user["accountId"] if self.cloud else user["userKey"]
+        user_value = user_id
        # Server uses userKey (but calls it key during the API call), Cloud uses accountId
        user_query = f"{user_field}={quote(user_value)}"

--- a/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py
+++ b/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py
@@ -1,3 +1,4 @@
+import re
 from collections.abc import Callable
 from collections.abc import Iterator
 from datetime import datetime
@@ -24,16 +25,22 @@ def datetime_to_utc(dt: datetime) -> datetime:


 def time_str_to_utc(datetime_str: str) -> datetime:
+    # Remove all timezone abbreviations in parentheses
+    datetime_str = re.sub(r"\([A-Z]+\)", "", datetime_str).strip()
+
+    # Remove any remaining parentheses and their contents
+    datetime_str = re.sub(r"\(.*?\)", "", datetime_str).strip()
+
    try:
        dt = parse(datetime_str)
    except ValueError:
-        # Handle malformed timezone by attempting to fix common format issues
+        # Fix common format issues (e.g. "0000" => "+0000")
        if "0000" in datetime_str:
-            # Convert "0000" to "+0000" for proper timezone parsing
-            fixed_dt_str = datetime_str.replace(" 0000", " +0000")
-            dt = parse(fixed_dt_str)
+            datetime_str = datetime_str.replace(" 0000", " +0000")
+            dt = parse(datetime_str)
        else:
            raise
+
    return datetime_to_utc(dt)


--- a/backend/onyx/connectors/dropbox/connector.py
+++ b/backend/onyx/connectors/dropbox/connector.py
@@ -4,12 +4,16 @@ from typing import Any

 from dropbox import Dropbox  # type: ignore
 from dropbox.exceptions import ApiError  # type:ignore
+from dropbox.exceptions import AuthError  # type:ignore
 from dropbox.files import FileMetadata  # type:ignore
 from dropbox.files import FolderMetadata  # type:ignore

 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
+from onyx.connectors.interfaces import ConnectorValidationError
+from onyx.connectors.interfaces import CredentialInvalidError
 from onyx.connectors.interfaces import GenerateDocumentsOutput
+from onyx.connectors.interfaces import InsufficientPermissionsError
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
@@ -141,6 +145,29 @@ class DropboxConnector(LoadConnector, PollConnector):

        return None

+    def validate_connector_settings(self) -> None:
+        if self.dropbox_client is None:
+            raise ConnectorMissingCredentialError("Dropbox credentials not loaded.")
+
+        try:
+            self.dropbox_client.files_list_folder(path="", limit=1)
+        except AuthError as e:
+            logger.exception("Failed to validate Dropbox credentials")
+            raise CredentialInvalidError(f"Dropbox credential is invalid: {e.error}")
+        except ApiError as e:
+            if (
+                e.error is not None
+                and "insufficient_permissions" in str(e.error).lower()
+            ):
+                raise InsufficientPermissionsError(
+                    "Your Dropbox token does not have sufficient permissions."
+                )
+            raise ConnectorValidationError(
+                f"Unexpected Dropbox error during validation: {e.user_message_text or e}"
+            )
+        except Exception as e:
+            raise Exception(f"Unexpected error during Dropbox settings validation: {e}")
+

 if __name__ == "__main__":
    import os
--- a/backend/onyx/connectors/factory.py
+++ b/backend/onyx/connectors/factory.py
@@ -3,6 +3,7 @@ from typing import Type

 from sqlalchemy.orm import Session

+from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import DocumentSourceRequiringTenantContext
 from onyx.connectors.airtable.airtable_connector import AirtableConnector
@@ -31,6 +32,7 @@ from onyx.connectors.guru.connector import GuruConnector
 from onyx.connectors.hubspot.connector import HubSpotConnector
 from onyx.connectors.interfaces import BaseConnector
 from onyx.connectors.interfaces import CheckpointConnector
+from onyx.connectors.interfaces import ConnectorValidationError
 from onyx.connectors.interfaces import EventConnector
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
@@ -52,8 +54,11 @@ from onyx.connectors.wikipedia.connector import WikipediaConnector
 from onyx.connectors.xenforo.connector import XenforoConnector
 from onyx.connectors.zendesk.connector import ZendeskConnector
 from onyx.connectors.zulip.connector import ZulipConnector
+from onyx.db.connector import fetch_connector_by_id
 from onyx.db.credentials import backend_update_credential_json
+from onyx.db.credentials import fetch_credential_by_id_for_user
 from onyx.db.models import Credential
+from onyx.db.models import User


 class ConnectorMissingException(Exception):
@@ -174,3 +179,49 @@ def instantiate_connector(
        backend_update_credential_json(credential, new_credentials, db_session)

    return connector
+
+
+def validate_ccpair_for_user(
+    connector_id: int,
+    credential_id: int,
+    db_session: Session,
+    user: User | None,
+    tenant_id: str | None,
+) -> None:
+    if INTEGRATION_TESTS_MODE:
+        return
+
+    # Validate the connector settings
+    connector = fetch_connector_by_id(connector_id, db_session)
+    credential = fetch_credential_by_id_for_user(
+        credential_id,
+        user,
+        db_session,
+        get_editable=False,
+    )
+
+    if not connector:
+        raise ValueError("Connector not found")
+
+    if (
+        connector.source == DocumentSource.INGESTION_API
+        or connector.source == DocumentSource.MOCK_CONNECTOR
+    ):
+        return
+
+    if not credential:
+        raise ValueError("Credential not found")
+
+    try:
+        runnable_connector = instantiate_connector(
+            db_session=db_session,
+            source=connector.source,
+            input_type=connector.input_type,
+            connector_specific_config=connector.connector_specific_config,
+            credential=credential,
+            tenant_id=tenant_id,
+        )
+    except Exception as e:
+        raise ConnectorValidationError(str(e))
+
+    runnable_connector.validate_connector_settings()
--- a/backend/onyx/connectors/file/connector.py
+++ b/backend/onyx/connectors/file/connector.py
@@ -181,7 +181,7 @@ class LocalFileConnector(LoadConnector):
        documents: list[Document] = []
        token = CURRENT_TENANT_ID_CONTEXTVAR.set(self.tenant_id)

-        with get_session_with_tenant(self.tenant_id) as db_session:
+        with get_session_with_tenant(tenant_id=self.tenant_id) as db_session:
            for file_path in self.file_locations:
                current_datetime = datetime.now(timezone.utc)
                files = _read_files_and_metadata(
--- a/backend/onyx/connectors/fireflies/connector.py
+++ b/backend/onyx/connectors/fireflies/connector.py
@@ -187,12 +187,12 @@ class FirefliesConnector(PollConnector, LoadConnector):
        return self._process_transcripts()

    def poll_source(
-        self, start_unixtime: SecondsSinceUnixEpoch, end_unixtime: SecondsSinceUnixEpoch
+        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
    ) -> GenerateDocumentsOutput:
-        start_datetime = datetime.fromtimestamp(
-            start_unixtime, tz=timezone.utc
-        ).strftime("%Y-%m-%dT%H:%M:%S.000Z")
-        end_datetime = datetime.fromtimestamp(end_unixtime, tz=timezone.utc).strftime(
+        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
+            "%Y-%m-%dT%H:%M:%S.000Z"
+        )
+        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc).strftime(
            "%Y-%m-%dT%H:%M:%S.000Z"
        )

--- a/backend/onyx/connectors/gitbook/connector.py
+++ b/backend/onyx/connectors/gitbook/connector.py
@@ -229,16 +229,20 @@ class GitbookConnector(LoadConnector, PollConnector):

        try:
            content = self.client.get(f"/spaces/{self.space_id}/content")
-            pages = content.get("pages", [])
-
+            pages: list[dict[str, Any]] = content.get("pages", [])
            current_batch: list[Document] = []
-            for page in pages:
-                updated_at = datetime.fromisoformat(page["updatedAt"])

+            while pages:
+                page = pages.pop(0)
+
+                updated_at_raw = page.get("updatedAt")
+                if updated_at_raw is None:
+                    # if updatedAt is not present, that means the page has never been edited
+                    continue
+
+                updated_at = datetime.fromisoformat(updated_at_raw)
                if start and updated_at < start:
-                    if current_batch:
-                        yield current_batch
-                    return
+                    continue
                if end and updated_at > end:
                    continue

@@ -250,6 +254,8 @@ class GitbookConnector(LoadConnector, PollConnector):
                    yield current_batch
                    current_batch = []

+                pages.extend(page.get("pages", []))
+
            if current_batch:
                yield current_batch

--- a/backend/onyx/connectors/github/connector.py
+++ b/backend/onyx/connectors/github/connector.py
@@ -9,6 +9,7 @@ from typing import cast
 from github import Github
 from github import RateLimitExceededException
 from github import Repository
+from github.GithubException import GithubException
 from github.Issue import Issue
 from github.PaginatedList import PaginatedList
 from github.PullRequest import PullRequest
@@ -16,17 +17,20 @@ from github.PullRequest import PullRequest
 from onyx.configs.app_configs import GITHUB_CONNECTOR_BASE_URL
 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
+from onyx.connectors.interfaces import ConnectorValidationError
+from onyx.connectors.interfaces import CredentialExpiredError
 from onyx.connectors.interfaces import GenerateDocumentsOutput
+from onyx.connectors.interfaces import InsufficientPermissionsError
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
+from onyx.connectors.interfaces import UnexpectedError
 from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
 from onyx.connectors.models import Section
 from onyx.utils.batching import batch_generator
 from onyx.utils.logger import setup_logger

-
 logger = setup_logger()


@@ -226,6 +230,48 @@ class GithubConnector(LoadConnector, PollConnector):

        return self._fetch_from_github(adjusted_start_datetime, end_datetime)

+    def validate_connector_settings(self) -> None:
+        if self.github_client is None:
+            raise ConnectorMissingCredentialError("GitHub credentials not loaded.")
+
+        if not self.repo_owner or not self.repo_name:
+            raise ConnectorValidationError(
+                "Invalid connector settings: 'repo_owner' and 'repo_name' must be provided."
+            )
+
+        try:
+            test_repo = self.github_client.get_repo(
+                f"{self.repo_owner}/{self.repo_name}"
+            )
+            test_repo.get_contents("")
+
+        except RateLimitExceededException:
+            raise UnexpectedError(
+                "Validation failed due to GitHub rate-limits being exceeded. Please try again later."
+            )
+
+        except GithubException as e:
+            if e.status == 401:
+                raise CredentialExpiredError(
+                    "GitHub credential appears to be invalid or expired (HTTP 401)."
+                )
+            elif e.status == 403:
+                raise InsufficientPermissionsError(
+                    "Your GitHub token does not have sufficient permissions for this repository (HTTP 403)."
+                )
+            elif e.status == 404:
+                raise ConnectorValidationError(
+                    f"GitHub repository not found with name: {self.repo_owner}/{self.repo_name}"
+                )
+            else:
+                raise ConnectorValidationError(
+                    f"Unexpected GitHub error (status={e.status}): {e.data}"
+                )
+        except Exception as exc:
+            raise Exception(
+                f"Unexpected error during GitHub settings validation: {exc}"
+            )
+

 if __name__ == "__main__":
    import os
--- a/backend/onyx/connectors/gmail/connector.py
+++ b/backend/onyx/connectors/gmail/connector.py
@@ -297,6 +297,7 @@ class GmailConnector(LoadConnector, PollConnector, SlimConnector):
                userId=user_email,
                fields=THREAD_LIST_FIELDS,
                q=query,
+                continue_on_404_or_403=True,
            ):
                full_threads = execute_paginated_retrieval(
                    retrieval_function=gmail_service.users().threads().get,
--- a/backend/onyx/connectors/google_drive/connector.py
+++ b/backend/onyx/connectors/google_drive/connector.py
@@ -220,7 +220,14 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
        return self._creds

    def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None:
-        self._primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY]
+        try:
+            self._primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY]
+        except KeyError:
+            raise ValueError(
+                "Primary admin email missing, "
+                "should not call this property "
+                "before calling load_credentials"
+            )

        self._creds, new_creds_dict = get_google_creds(
            credentials=credentials,
--- a/backend/onyx/connectors/interfaces.py
+++ b/backend/onyx/connectors/interfaces.py
@@ -12,7 +12,6 @@ from onyx.connectors.models import Document
 from onyx.connectors.models import SlimDocument
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface

-
 SecondsSinceUnixEpoch = float

 GenerateDocumentsOutput = Iterator[list[Document]]
@@ -45,6 +44,14 @@ class BaseConnector(abc.ABC):
                raise RuntimeError(custom_parser_req_msg)
        return metadata_lines

+    def validate_connector_settings(self) -> None:
+        """
+        Override this if your connector needs to validate credentials or settings.
+        Raise an exception if invalid, otherwise do nothing.
+
+        Default is a no-op (always successful).
+        """
+

 # Large set update or reindex, generally pulling a complete state or from a savestate file
 class LoadConnector(BaseConnector):
@@ -139,3 +146,46 @@ class CheckpointConnector(BaseConnector):
        ```
        """
        raise NotImplementedError
+
+
+class ConnectorValidationError(Exception):
+    """General exception for connector validation errors."""
+
+    def __init__(self, message: str):
+        self.message = message
+        super().__init__(self.message)
+
+
+class UnexpectedError(Exception):
+    """Raised when an unexpected error occurs during connector validation.
+
+    Unexpected errors don't necessarily mean the credential is invalid,
+    but rather that there was an error during the validation process
+    or we encountered a currently unhandled error case.
+    """
+
+    def __init__(self, message: str = "Unexpected error during connector validation"):
+        super().__init__(message)
+
+
+class CredentialInvalidError(ConnectorValidationError):
+    """Raised when a connector's credential is invalid."""
+
+    def __init__(self, message: str = "Credential is invalid"):
+        super().__init__(message)
+
+
+class CredentialExpiredError(ConnectorValidationError):
+    """Raised when a connector's credential is expired."""
+
+    def __init__(self, message: str = "Credential has expired"):
+        super().__init__(message)
+
+
+class InsufficientPermissionsError(ConnectorValidationError):
+    """Raised when the credential does not have sufficient API permissions."""
+
+    def __init__(
+        self, message: str = "Insufficient permissions for the requested operation"
+    ):
+        super().__init__(message)
--- a/backend/onyx/connectors/notion/connector.py
+++ b/backend/onyx/connectors/notion/connector.py
@@ -7,6 +7,7 @@ from datetime import timezone
 from typing import Any
 from typing import Optional

+import requests
 from retry import retry

 from onyx.configs.app_configs import INDEX_BATCH_SIZE
@@ -15,10 +16,14 @@ from onyx.configs.constants import DocumentSource
 from onyx.connectors.cross_connector_utils.rate_limit_wrapper import (
    rl_requests,
 )
+from onyx.connectors.interfaces import ConnectorValidationError
+from onyx.connectors.interfaces import CredentialExpiredError
 from onyx.connectors.interfaces import GenerateDocumentsOutput
+from onyx.connectors.interfaces import InsufficientPermissionsError
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
+from onyx.connectors.models import ConnectorMissingCredentialError
 from onyx.connectors.models import Document
 from onyx.connectors.models import Section
 from onyx.utils.batching import batch_generator
@@ -616,6 +621,64 @@ class NotionConnector(LoadConnector, PollConnector):
            else:
                break

+    def validate_connector_settings(self) -> None:
+        if not self.headers.get("Authorization"):
+            raise ConnectorMissingCredentialError("Notion credentials not loaded.")
+
+        try:
+            # We'll do a minimal search call (page_size=1) to confirm accessibility
+            if self.root_page_id:
+                # If root_page_id is set, fetch the specific page
+                res = rl_requests.get(
+                    f"https://api.notion.com/v1/pages/{self.root_page_id}",
+                    headers=self.headers,
+                    timeout=_NOTION_CALL_TIMEOUT,
+                )
+            else:
+                # If root_page_id is not set, perform a minimal search
+                test_query = {
+                    "filter": {"property": "object", "value": "page"},
+                    "page_size": 1,
+                }
+                res = rl_requests.post(
+                    "https://api.notion.com/v1/search",
+                    headers=self.headers,
+                    json=test_query,
+                    timeout=_NOTION_CALL_TIMEOUT,
+                )
+            res.raise_for_status()
+
+        except requests.exceptions.HTTPError as http_err:
+            status_code = http_err.response.status_code if http_err.response else None
+
+            if status_code == 401:
+                raise CredentialExpiredError(
+                    "Notion credential appears to be invalid or expired (HTTP 401)."
+                )
+            elif status_code == 403:
+                raise InsufficientPermissionsError(
+                    "Your Notion token does not have sufficient permissions (HTTP 403)."
+                )
+            elif status_code == 404:
+                # Typically means resource not found or not shared. Could be root_page_id is invalid.
+                raise ConnectorValidationError(
+                    "Notion resource not found or not shared with the integration (HTTP 404)."
+                )
+            elif status_code == 429:
+                raise ConnectorValidationError(
+                    "Validation failed due to Notion rate-limits being exceeded (HTTP 429). "
+                    "Please try again later."
+                )
+            else:
+                raise Exception(
+                    f"Unexpected Notion HTTP error (status={status_code}): {http_err}"
+                ) from http_err
+
+        except Exception as exc:
+            raise Exception(
+                f"Unexpected error during Notion settings validation: {exc}"
+            )
+

 if __name__ == "__main__":
    import os
--- a/backend/onyx/connectors/onyx_jira/connector.py
+++ b/backend/onyx/connectors/onyx_jira/connector.py
@@ -12,8 +12,11 @@ from onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
 from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
 from onyx.configs.constants import DocumentSource
 from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
+from onyx.connectors.interfaces import ConnectorValidationError
+from onyx.connectors.interfaces import CredentialExpiredError
 from onyx.connectors.interfaces import GenerateDocumentsOutput
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
+from onyx.connectors.interfaces import InsufficientPermissionsError
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
@@ -272,6 +275,40 @@ class JiraConnector(LoadConnector, PollConnector, SlimConnector):

        yield slim_doc_batch

+    def validate_connector_settings(self) -> None:
+        if self._jira_client is None:
+            raise ConnectorMissingCredentialError("Jira")
+
+        if not self._jira_project:
+            raise ConnectorValidationError(
+                "Invalid connector settings: 'jira_project' must be provided."
+            )
+
+        try:
+            self.jira_client.project(self._jira_project)
+
+        except Exception as e:
+            status_code = getattr(e, "status_code", None)
+
+            if status_code == 401:
+                raise CredentialExpiredError(
+                    "Jira credential appears to be expired or invalid (HTTP 401)."
+                )
+            elif status_code == 403:
+                raise InsufficientPermissionsError(
+                    "Your Jira token does not have sufficient permissions for this project (HTTP 403)."
+                )
+            elif status_code == 404:
+                raise ConnectorValidationError(
+                    f"Jira project not found with key: {self._jira_project}"
+                )
+            elif status_code == 429:
+                raise ConnectorValidationError(
+                    "Validation failed due to Jira rate-limits being exceeded. Please try again later."
+                )
+            else:
+                raise Exception(f"Unexpected Jira error during validation: {e}")
+

 if __name__ == "__main__":
    import os
--- a/backend/onyx/connectors/web/connector.py
+++ b/backend/onyx/connectors/web/connector.py
@@ -25,8 +25,12 @@ from onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET
 from onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL
 from onyx.configs.app_configs import WEB_CONNECTOR_VALIDATE_URLS
 from onyx.configs.constants import DocumentSource
+from onyx.connectors.interfaces import ConnectorValidationError
+from onyx.connectors.interfaces import CredentialExpiredError
 from onyx.connectors.interfaces import GenerateDocumentsOutput
+from onyx.connectors.interfaces import InsufficientPermissionsError
 from onyx.connectors.interfaces import LoadConnector
+from onyx.connectors.interfaces import UnexpectedError
 from onyx.connectors.models import Document
 from onyx.connectors.models import Section
 from onyx.file_processing.extract_file_text import read_pdf_file
@@ -37,6 +41,8 @@ from shared_configs.configs import MULTI_TENANT

 logger = setup_logger()

+WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS = 20
+

 class WEB_CONNECTOR_VALID_SETTINGS(str, Enum):
    # Given a base site, index everything under that path
@@ -170,26 +176,35 @@ def start_playwright() -> Tuple[Playwright, BrowserContext]:


 def extract_urls_from_sitemap(sitemap_url: str) -> list[str]:
-    response = requests.get(sitemap_url)
-    response.raise_for_status()
+    try:
+        response = requests.get(sitemap_url)
+        response.raise_for_status()

-    soup = BeautifulSoup(response.content, "html.parser")
-    urls = [
-        _ensure_absolute_url(sitemap_url, loc_tag.text)
-        for loc_tag in soup.find_all("loc")
-    ]
+        soup = BeautifulSoup(response.content, "html.parser")
+        urls = [
+            _ensure_absolute_url(sitemap_url, loc_tag.text)
+            for loc_tag in soup.find_all("loc")
+        ]

-    if len(urls) == 0 and len(soup.find_all("urlset")) == 0:
-        # the given url doesn't look like a sitemap, let's try to find one
-        urls = list_pages_for_site(sitemap_url)
+        if len(urls) == 0 and len(soup.find_all("urlset")) == 0:
+            # the given url doesn't look like a sitemap, let's try to find one
+            urls = list_pages_for_site(sitemap_url)

-    if len(urls) == 0:
-        raise ValueError(
-            f"No URLs found in sitemap {sitemap_url}. Try using the 'single' or 'recursive' scraping options instead."
+        if len(urls) == 0:
+            raise ValueError(
+                f"No URLs found in sitemap {sitemap_url}. Try using the 'single' or 'recursive' scraping options instead."
+            )
+
+        return urls
+    except requests.RequestException as e:
+        raise RuntimeError(f"Failed to fetch sitemap from {sitemap_url}: {e}")
+    except ValueError as e:
+        raise RuntimeError(f"Error processing sitemap {sitemap_url}: {e}")
+    except Exception as e:
+        raise RuntimeError(
+            f"Unexpected error while processing sitemap {sitemap_url}: {e}"
        )

-    return urls
-

 def _ensure_absolute_url(source_url: str, maybe_relative_url: str) -> str:
    if not urlparse(maybe_relative_url).netloc:
@@ -225,10 +240,14 @@ class WebConnector(LoadConnector):
        web_connector_type: str = WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value,
        mintlify_cleanup: bool = True,  # Mostly ok to apply to other websites as well
        batch_size: int = INDEX_BATCH_SIZE,
+        scroll_before_scraping: bool = False,
+        **kwargs: Any,
    ) -> None:
        self.mintlify_cleanup = mintlify_cleanup
        self.batch_size = batch_size
        self.recursive = False
+        self.scroll_before_scraping = scroll_before_scraping
+        self.web_connector_type = web_connector_type

        if web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value:
            self.recursive = True
@@ -344,6 +363,18 @@ class WebConnector(LoadConnector):
                        continue
                    visited_links.add(current_url)

+                if self.scroll_before_scraping:
+                    scroll_attempts = 0
+                    previous_height = page.evaluate("document.body.scrollHeight")
+                    while scroll_attempts < WEB_CONNECTOR_MAX_SCROLL_ATTEMPTS:
+                        page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
+                        page.wait_for_load_state("networkidle", timeout=30000)
+                        new_height = page.evaluate("document.body.scrollHeight")
+                        if new_height == previous_height:
+                            break  # Stop scrolling when no more content is loaded
+                        previous_height = new_height
+                        scroll_attempts += 1
+
                content = page.content()
                soup = BeautifulSoup(content, "html.parser")

@@ -402,6 +433,53 @@ class WebConnector(LoadConnector):
                raise RuntimeError(last_error)
            raise RuntimeError("No valid pages found.")

+    def validate_connector_settings(self) -> None:
+        # Make sure we have at least one valid URL to check
+        if not self.to_visit_list:
+            raise ConnectorValidationError(
+                "No URL configured. Please provide at least one valid URL."
+            )
+
+        if self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP.value:
+            return None
+
+        # We'll just test the first URL for connectivity and correctness
+        test_url = self.to_visit_list[0]
+
+        # Check that the URL is allowed and well-formed
+        try:
+            protected_url_check(test_url)
+        except ValueError as e:
+            raise ConnectorValidationError(
+                f"Protected URL check failed for '{test_url}': {e}"
+            )
+        except ConnectionError as e:
+            # Typically DNS or other network issues
+            raise ConnectorValidationError(str(e))
+
+        # Make a quick request to see if we get a valid response
+        try:
+            check_internet_connection(test_url)
+        except Exception as e:
+            err_str = str(e)
+            if "401" in err_str:
+                raise CredentialExpiredError(
+                    f"Unauthorized access to '{test_url}': {e}"
+                )
+            elif "403" in err_str:
+                raise InsufficientPermissionsError(
+                    f"Forbidden access to '{test_url}': {e}"
+                )
+            elif "404" in err_str:
+                raise ConnectorValidationError(f"Page not found for '{test_url}': {e}")
+            elif "Max retries exceeded" in err_str and "NameResolutionError" in err_str:
+                raise ConnectorValidationError(
+                    f"Unable to resolve hostname for '{test_url}'. Please check the URL and your internet connection."
+                )
+            else:
+                # Could be a 5xx or another error, treat as unexpected
+                raise UnexpectedError(f"Unexpected error validating '{test_url}': {e}")
+

 if __name__ == "__main__":
    connector = WebConnector("https://docs.onyx.app/")
--- a/backend/onyx/context/search/preprocessing/preprocessing.py
+++ b/backend/onyx/context/search/preprocessing/preprocessing.py
@@ -23,7 +23,6 @@ from onyx.context.search.preprocessing.access_filters import (
 from onyx.context.search.retrieval.search_runner import (
    remove_stop_words_and_punctuation,
 )
-from onyx.db.engine import CURRENT_TENANT_ID_CONTEXTVAR
 from onyx.db.models import User
 from onyx.db.search_settings import get_current_search_settings
 from onyx.llm.interfaces import LLM
@@ -35,6 +34,7 @@ from onyx.utils.threadpool_concurrency import FunctionCall
 from onyx.utils.threadpool_concurrency import run_functions_in_parallel
 from onyx.utils.timing import log_function_time
 from shared_configs.configs import MULTI_TENANT
+from shared_configs.contextvars import get_current_tenant_id


 logger = setup_logger()
@@ -166,7 +166,7 @@ def retrieval_preprocessing(
        time_cutoff=time_filter or predicted_time_cutoff,
        tags=preset_filters.tags,  # Tags are never auto-extracted
        access_control_list=user_acl_filters,
-        tenant_id=CURRENT_TENANT_ID_CONTEXTVAR.get() if MULTI_TENANT else None,
+        tenant_id=get_current_tenant_id() if MULTI_TENANT else None,
    )

    llm_evaluation_type = LLMEvaluationType.BASIC
--- a/backend/onyx/db/api_key.py
+++ b/backend/onyx/db/api_key.py
@@ -17,7 +17,7 @@ from onyx.db.models import ApiKey
 from onyx.db.models import User
 from onyx.server.api_key.models import APIKeyArgs
 from shared_configs.configs import MULTI_TENANT
-from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
+from shared_configs.contextvars import get_current_tenant_id


 def get_api_key_email_pattern() -> str:
@@ -71,7 +71,7 @@ def insert_api_key(
    std_password_helper = PasswordHelper()

    # Get tenant_id from context var (will be default schema for single tenant)
-    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
+    tenant_id = get_current_tenant_id()

    api_key = generate_api_key(tenant_id if MULTI_TENANT else None)
    api_key_user_id = uuid.uuid4()
--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@@ -628,7 +628,8 @@ def create_new_chat_message(
    commit: bool = True,
    reserved_message_id: int | None = None,
    overridden_model: str | None = None,
-    refined_answer_improvement: bool = True,
+    refined_answer_improvement: bool | None = None,
+    is_agentic: bool = False,
 ) -> ChatMessage:
    if reserved_message_id is not None:
        # Edit existing message
@@ -650,7 +651,7 @@ def create_new_chat_message(
        existing_message.alternate_assistant_id = alternate_assistant_id
        existing_message.overridden_model = overridden_model
        existing_message.refined_answer_improvement = refined_answer_improvement
-
+        existing_message.is_agentic = is_agentic
        new_chat_message = existing_message
    else:
        # Create new message
@@ -670,6 +671,7 @@ def create_new_chat_message(
            alternate_assistant_id=alternate_assistant_id,
            overridden_model=overridden_model,
            refined_answer_improvement=refined_answer_improvement,
+            is_agentic=is_agentic,
        )
        db_session.add(new_chat_message)

@@ -960,6 +962,7 @@ def translate_db_message_to_chat_message_detail(
            chat_message.sub_questions
        ),
        refined_answer_improvement=chat_message.refined_answer_improvement,
+        error=chat_message.error,
    )

    return chat_msg_detail
--- a/backend/onyx/db/connector_credential_pair.py
+++ b/backend/onyx/db/connector_credential_pair.py
@@ -194,9 +194,14 @@ def get_connector_credential_pair_from_id_for_user(
 def get_connector_credential_pair_from_id(
    db_session: Session,
    cc_pair_id: int,
+    eager_load_credential: bool = False,
 ) -> ConnectorCredentialPair | None:
    stmt = select(ConnectorCredentialPair).distinct()
    stmt = stmt.where(ConnectorCredentialPair.id == cc_pair_id)
+
+    if eager_load_credential:
+        stmt = stmt.options(joinedload(ConnectorCredentialPair.credential))
+
    result = db_session.execute(stmt)
    return result.scalar_one_or_none()

--- a/backend/onyx/db/credentials.py
+++ b/backend/onyx/db/credentials.py
@@ -14,6 +14,7 @@ from onyx.configs.constants import DocumentSource
 from onyx.connectors.google_utils.shared_constants import (
    DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY,
 )
+from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.models import ConnectorCredentialPair
 from onyx.db.models import Credential
 from onyx.db.models import Credential__UserGroup
@@ -245,6 +246,10 @@ def swap_credentials_connector(
    existing_pair.credential_id = new_credential_id
    existing_pair.credential = new_credential

+    # Update ccpair status if it's in INVALID state
+    if existing_pair.status == ConnectorCredentialPairStatus.INVALID:
+        existing_pair.status = ConnectorCredentialPairStatus.ACTIVE
+
    # Commit the changes
    db_session.commit()

--- a/backend/onyx/db/document.py
+++ b/backend/onyx/db/document.py
@@ -60,9 +60,8 @@ def count_documents_by_needs_sync(session: Session) -> int:
    This function executes the query and returns the count of
    documents matching the criteria."""

-    count = (
-        session.query(func.count(DbDocument.id.distinct()))
-        .select_from(DbDocument)
+    return (
+        session.query(DbDocument.id)
        .join(
            DocumentByConnectorCredentialPair,
            DbDocument.id == DocumentByConnectorCredentialPair.id,
@@ -73,63 +72,53 @@ def count_documents_by_needs_sync(session: Session) -> int:
                DbDocument.last_synced.is_(None),
            )
        )
-        .scalar()
+        .count()
    )

-    return count
-

 def construct_document_select_for_connector_credential_pair_by_needs_sync(
    connector_id: int, credential_id: int
 ) -> Select:
-    initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
-        and_(
-            DocumentByConnectorCredentialPair.connector_id == connector_id,
-            DocumentByConnectorCredentialPair.credential_id == credential_id,
-        )
-    )
-
-    stmt = (
+    return (
        select(DbDocument)
-        .where(
-            DbDocument.id.in_(initial_doc_ids_stmt),
-            or_(
-                DbDocument.last_modified
-                > DbDocument.last_synced,  # last_modified is newer than last_synced
-                DbDocument.last_synced.is_(None),  # never synced
-            ),
+        .join(
+            DocumentByConnectorCredentialPair,
+            DbDocument.id == DocumentByConnectorCredentialPair.id,
+        )
+        .where(
+            and_(
+                DocumentByConnectorCredentialPair.connector_id == connector_id,
+                DocumentByConnectorCredentialPair.credential_id == credential_id,
+                or_(
+                    DbDocument.last_modified > DbDocument.last_synced,
+                    DbDocument.last_synced.is_(None),
+                ),
+            )
        )
-        .distinct()
    )

-    return stmt
-

 def construct_document_id_select_for_connector_credential_pair_by_needs_sync(
    connector_id: int, credential_id: int
 ) -> Select:
-    initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
-        and_(
-            DocumentByConnectorCredentialPair.connector_id == connector_id,
-            DocumentByConnectorCredentialPair.credential_id == credential_id,
-        )
-    )
-
-    stmt = (
+    return (
        select(DbDocument.id)
-        .where(
-            DbDocument.id.in_(initial_doc_ids_stmt),
-            or_(
-                DbDocument.last_modified
-                > DbDocument.last_synced,  # last_modified is newer than last_synced
-                DbDocument.last_synced.is_(None),  # never synced
-            ),
+        .join(
+            DocumentByConnectorCredentialPair,
+            DbDocument.id == DocumentByConnectorCredentialPair.id,
+        )
+        .where(
+            and_(
+                DocumentByConnectorCredentialPair.connector_id == connector_id,
+                DocumentByConnectorCredentialPair.credential_id == credential_id,
+                or_(
+                    DbDocument.last_modified > DbDocument.last_synced,
+                    DbDocument.last_synced.is_(None),
+                ),
+            )
        )
-        .distinct()
    )

-    return stmt
-

 def get_all_documents_needing_vespa_sync_for_cc_pair(
    db_session: Session, cc_pair_id: int
--- a/backend/onyx/db/engine.py
+++ b/backend/onyx/db/engine.py
@@ -1,5 +1,4 @@
 import contextlib
-import json
 import os
 import re
 import ssl
@@ -16,7 +15,6 @@ from typing import ContextManager
 import asyncpg  # type: ignore
 import boto3
 from fastapi import HTTPException
-from fastapi import Request
 from sqlalchemy import event
 from sqlalchemy import pool
 from sqlalchemy import text
@@ -44,13 +42,13 @@ from onyx.configs.app_configs import POSTGRES_USE_NULL_POOL
 from onyx.configs.app_configs import POSTGRES_USER
 from onyx.configs.constants import POSTGRES_UNKNOWN_APP_NAME
 from onyx.configs.constants import SSL_CERT_FILE
-from onyx.redis.redis_pool import retrieve_auth_token_data_from_redis
 from onyx.server.utils import BasicAuthenticationError
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
 from shared_configs.configs import TENANT_ID_PREFIX
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
+from shared_configs.contextvars import get_current_tenant_id

 logger = setup_logger()

@@ -191,16 +189,9 @@ class SqlEngine:
    _app_name: str = POSTGRES_UNKNOWN_APP_NAME

    @classmethod
-    def _init_engine(
-        cls, host: str, port: str, db: str, **engine_kwargs: Any
-    ) -> Engine:
+    def _init_engine(cls, **engine_kwargs: Any) -> Engine:
        connection_string = build_connection_string(
-            db_api=SYNC_DB_API,
-            host=host,
-            port=port,
-            db=db,
-            app_name=cls._app_name + "_sync",
-            use_iam=USE_IAM_AUTH,
+            db_api=SYNC_DB_API, app_name=cls._app_name + "_sync", use_iam=USE_IAM_AUTH
        )

        # Start with base kwargs that are valid for all pool types
@@ -238,19 +229,15 @@ class SqlEngine:
    def init_engine(cls, **engine_kwargs: Any) -> None:
        with cls._lock:
            if not cls._engine:
-                cls._engine = cls._init_engine(
-                    host=engine_kwargs.get("host", POSTGRES_HOST),
-                    port=engine_kwargs.get("port", POSTGRES_PORT),
-                    db=engine_kwargs.get("db", POSTGRES_DB),
-                    **engine_kwargs,
-                )
+                cls._engine = cls._init_engine(**engine_kwargs)

    @classmethod
    def get_engine(cls) -> Engine:
        if not cls._engine:
-            cls.init_engine()
-
-        return cls._engine  # type: ignore
+            with cls._lock:
+                if not cls._engine:
+                    cls._engine = cls._init_engine()
+        return cls._engine

    @classmethod
    def set_app_name(cls, app_name: str) -> None:
@@ -276,7 +263,7 @@ def get_all_tenant_ids() -> list[str] | list[None]:
    if not MULTI_TENANT:
        return [None]

-    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as session:
+    with get_session_with_shared_schema() as session:
        result = session.execute(
            text(
                f"""
@@ -364,38 +351,6 @@ def get_sqlalchemy_async_engine() -> AsyncEngine:
    return _ASYNC_ENGINE


-async def get_current_tenant_id(request: Request) -> str:
-    if not MULTI_TENANT:
-        tenant_id = POSTGRES_DEFAULT_SCHEMA
-        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
-        return tenant_id
-
-    try:
-        # Look up token data in Redis
-        token_data = await retrieve_auth_token_data_from_redis(request)
-
-        if not token_data:
-            current_value = CURRENT_TENANT_ID_CONTEXTVAR.get()
-            logger.debug(
-                f"Token data not found or expired in Redis, defaulting to {current_value}"
-            )
-            return current_value
-
-        tenant_id = token_data.get("tenant_id", POSTGRES_DEFAULT_SCHEMA)
-
-        if not is_valid_schema_name(tenant_id):
-            raise HTTPException(status_code=400, detail="Invalid tenant ID format")
-
-        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
-        return tenant_id
-    except json.JSONDecodeError:
-        logger.error("Error decoding token data from Redis")
-        return POSTGRES_DEFAULT_SCHEMA
-    except Exception as e:
-        logger.error(f"Unexpected error in get_current_tenant_id: {str(e)}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-
-
 # Listen for events on the synchronous Session class
@event.listens_for(Session, "after_begin")
 def _set_search_path(
@@ -421,7 +376,7 @@ async def get_async_session_with_tenant(
    tenant_id: str | None = None,
 ) -> AsyncGenerator[AsyncSession, None]:
    if tenant_id is None:
-        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
+        tenant_id = get_current_tenant_id()

    if not is_valid_schema_name(tenant_id):
        logger.error(f"Invalid tenant ID: {tenant_id}")
@@ -444,82 +399,80 @@ async def get_async_session_with_tenant(


@contextmanager
-def get_session_with_default_tenant() -> Generator[Session, None, None]:
-    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
-    with get_session_with_tenant(tenant_id) as session:
+def get_session_with_current_tenant() -> Generator[Session, None, None]:
+    tenant_id = get_current_tenant_id()
+
+    with get_session_with_tenant(tenant_id=tenant_id) as session:
        yield session


+# Used in multi tenant mode when need to refer to the shared `public` schema
@contextmanager
-def get_session_with_tenant(
-    tenant_id: str | None = None,
-) -> Generator[Session, None, None]:
+def get_session_with_shared_schema() -> Generator[Session, None, None]:
+    token = CURRENT_TENANT_ID_CONTEXTVAR.set(POSTGRES_DEFAULT_SCHEMA)
+    with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as session:
+        yield session
+    CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
+
+
+@contextmanager
+def get_session_with_tenant(*, tenant_id: str | None) -> Generator[Session, None, None]:
    """
    Generate a database session for a specific tenant.
-    This function:
-    1. Sets the database schema to the specified tenant's schema.
-    2. Preserves the tenant ID across the session.
-    3. Reverts to the previous tenant ID after the session is closed.
-    4. Uses the default schema if no tenant ID is provided.
    """
-    engine = get_sqlalchemy_engine()
-    previous_tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() or POSTGRES_DEFAULT_SCHEMA
-
    if tenant_id is None:
        tenant_id = POSTGRES_DEFAULT_SCHEMA

-    CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+    engine = get_sqlalchemy_engine()
+
    event.listen(engine, "checkout", set_search_path_on_checkout)

    if not is_valid_schema_name(tenant_id):
        raise HTTPException(status_code=400, detail="Invalid tenant ID")

-    try:
-        with engine.connect() as connection:
-            dbapi_connection = connection.connection
-            cursor = dbapi_connection.cursor()
-            try:
-                cursor.execute(f'SET search_path = "{tenant_id}"')
-                if POSTGRES_IDLE_SESSIONS_TIMEOUT:
-                    cursor.execute(
-                        text(
-                            f"SET SESSION idle_in_transaction_session_timeout = {POSTGRES_IDLE_SESSIONS_TIMEOUT}"
-                        )
+    with engine.connect() as connection:
+        dbapi_connection = connection.connection
+        cursor = dbapi_connection.cursor()
+        try:
+            cursor.execute(f'SET search_path = "{tenant_id}"')
+            if POSTGRES_IDLE_SESSIONS_TIMEOUT:
+                cursor.execute(
+                    text(
+                        f"SET SESSION idle_in_transaction_session_timeout = {POSTGRES_IDLE_SESSIONS_TIMEOUT}"
                    )
-            finally:
-                cursor.close()
+                )
+        finally:
+            cursor.close()

-            with Session(bind=connection, expire_on_commit=False) as session:
-                try:
-                    yield session
-                finally:
-                    if MULTI_TENANT:
-                        cursor = dbapi_connection.cursor()
-                        try:
-                            cursor.execute('SET search_path TO "$user", public')
-                        finally:
-                            cursor.close()
-    finally:
-        CURRENT_TENANT_ID_CONTEXTVAR.set(previous_tenant_id)
+        with Session(bind=connection, expire_on_commit=False) as session:
+            try:
+                yield session
+            finally:
+                if MULTI_TENANT:
+                    cursor = dbapi_connection.cursor()
+                    try:
+                        cursor.execute('SET search_path TO "$user", public')
+                    finally:
+                        cursor.close()


 def set_search_path_on_checkout(
    dbapi_conn: Any, connection_record: Any, connection_proxy: Any
 ) -> None:
-    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
+    tenant_id = get_current_tenant_id()
    if tenant_id and is_valid_schema_name(tenant_id):
        with dbapi_conn.cursor() as cursor:
            cursor.execute(f'SET search_path TO "{tenant_id}"')


 def get_session_generator_with_tenant() -> Generator[Session, None, None]:
-    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
-    with get_session_with_tenant(tenant_id) as session:
+    tenant_id = get_current_tenant_id()
+    with get_session_with_tenant(tenant_id=tenant_id) as session:
        yield session


 def get_session() -> Generator[Session, None, None]:
-    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
+    tenant_id = get_current_tenant_id()
    if tenant_id == POSTGRES_DEFAULT_SCHEMA and MULTI_TENANT:
        raise BasicAuthenticationError(detail="User must authenticate")

@@ -534,7 +487,7 @@ def get_session() -> Generator[Session, None, None]:


 async def get_async_session() -> AsyncGenerator[AsyncSession, None]:
-    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
+    tenant_id = get_current_tenant_id()
    engine = get_sqlalchemy_async_engine()
    async with AsyncSession(engine, expire_on_commit=False) as async_session:
        if MULTI_TENANT:
--- a/backend/onyx/db/enums.py
+++ b/backend/onyx/db/enums.py
@@ -73,6 +73,7 @@ class ConnectorCredentialPairStatus(str, PyEnum):
    ACTIVE = "ACTIVE"
    PAUSED = "PAUSED"
    DELETING = "DELETING"
+    INVALID = "INVALID"

    def is_active(self) -> bool:
        return self == ConnectorCredentialPairStatus.ACTIVE
--- a/backend/onyx/db/models.py
+++ b/backend/onyx/db/models.py
@@ -148,11 +148,12 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
    putting here for simpicity
    """

-    # if specified, controls the assistants that are shown to the user + their order
-    # if not specified, all assistants are shown
-    temperature_override_enabled: Mapped[bool] = mapped_column(Boolean, default=False)
-    auto_scroll: Mapped[bool] = mapped_column(Boolean, default=True)
+    temperature_override_enabled: Mapped[bool | None] = mapped_column(
+        Boolean, default=None
+    )
+    auto_scroll: Mapped[bool | None] = mapped_column(Boolean, default=None)
    shortcut_enabled: Mapped[bool] = mapped_column(Boolean, default=False)
+
    chosen_assistants: Mapped[list[int] | None] = mapped_column(
        postgresql.JSONB(), nullable=True, default=None
    )
@@ -204,6 +205,13 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
        primaryjoin="User.id == foreign(ConnectorCredentialPair.creator_id)",
    )

+    @property
+    def password_configured(self) -> bool:
+        """
+        Returns True if the user has at least one OAuth (or OIDC) account.
+        """
+        return not bool(self.oauth_accounts)
+

 class AccessToken(SQLAlchemyBaseAccessTokenTableUUID, Base):
    pass
@@ -342,7 +350,9 @@ class Document__Tag(Base):
    document_id: Mapped[str] = mapped_column(
        ForeignKey("document.id"), primary_key=True
    )
-    tag_id: Mapped[int] = mapped_column(ForeignKey("tag.id"), primary_key=True)
+    tag_id: Mapped[int] = mapped_column(
+        ForeignKey("tag.id"), primary_key=True, index=True
+    )


 class Persona__Tool(Base):
@@ -560,6 +570,14 @@ class Document(Base):
        back_populates="documents",
    )

+    __table_args__ = (
+        Index(
+            "ix_document_sync_status",
+            last_modified,
+            last_synced,
+        ),
+    )
+

 class Tag(Base):
    __tablename__ = "tag"
@@ -1221,6 +1239,7 @@ class ChatMessage(Base):
        DateTime(timezone=True), server_default=func.now()
    )

+    is_agentic: Mapped[bool] = mapped_column(Boolean, default=False)
    refined_answer_improvement: Mapped[bool] = mapped_column(Boolean, nullable=True)

    chat_session: Mapped[ChatSession] = relationship("ChatSession")
@@ -1742,6 +1761,7 @@ class ChannelConfig(TypedDict):
    # If empty list, follow up with no tags
    follow_up_tags: NotRequired[list[str]]
    show_continue_in_web_ui: NotRequired[bool]  # defaults to False
+    disabled: NotRequired[bool]  # defaults to False


 class SlackChannelConfig(Base):
@@ -1765,6 +1785,7 @@ class SlackChannelConfig(Base):
    is_default: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)

    persona: Mapped[Persona | None] = relationship("Persona")
+
    slack_bot: Mapped["SlackBot"] = relationship(
        "SlackBot",
        back_populates="slack_channel_configs",
--- a/backend/onyx/db/search_settings.py
+++ b/backend/onyx/db/search_settings.py
@@ -13,7 +13,7 @@ from onyx.configs.model_configs import OLD_DEFAULT_DOCUMENT_ENCODER_MODEL
 from onyx.configs.model_configs import OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM
 from onyx.configs.model_configs import OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS
 from onyx.context.search.models import SavedSearchSettings
-from onyx.db.engine import get_session_with_default_tenant
+from onyx.db.engine import get_session_with_current_tenant
 from onyx.db.llm import fetch_embedding_provider
 from onyx.db.models import CloudEmbeddingProvider
 from onyx.db.models import IndexAttempt
@@ -189,7 +189,7 @@ def get_all_search_settings(db_session: Session) -> list[SearchSettings]:

 def get_multilingual_expansion(db_session: Session | None = None) -> list[str]:
    if db_session is None:
-        with get_session_with_default_tenant() as db_session:
+        with get_session_with_current_tenant() as db_session:
            search_settings = get_current_search_settings(db_session)
    else:
        search_settings = get_current_search_settings(db_session)
--- a/backend/onyx/db/slack_channel_config.py
+++ b/backend/onyx/db/slack_channel_config.py
@@ -151,6 +151,7 @@ def update_slack_channel_config(
    channel_config: ChannelConfig,
    standard_answer_category_ids: list[int],
    enable_auto_filters: bool,
+    disabled: bool,
 ) -> SlackChannelConfig:
    slack_channel_config = db_session.scalar(
        select(SlackChannelConfig).where(
--- a/backend/onyx/document_index/factory.py
+++ b/backend/onyx/document_index/factory.py
@@ -6,7 +6,6 @@ from onyx.db.search_settings import get_current_search_settings
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.document_index.vespa.index import VespaIndex
 from shared_configs.configs import MULTI_TENANT
-from shared_configs.configs import VECTOR_DB_INDEX_NAME_PREFIX__INTEGRATION_TEST_ONLY


 def get_default_document_index(
@@ -24,27 +23,14 @@ def get_default_document_index(
        secondary_index_name = secondary_search_settings.index_name
        secondary_large_chunks_enabled = secondary_search_settings.large_chunks_enabled

-    # modify index names for integration tests so that we can run many tests
-    # using the same Vespa instance w/o having them collide
-    primary_index_name = search_settings.index_name
-    if VECTOR_DB_INDEX_NAME_PREFIX__INTEGRATION_TEST_ONLY:
-        primary_index_name = (
-            f"{VECTOR_DB_INDEX_NAME_PREFIX__INTEGRATION_TEST_ONLY}_{primary_index_name}"
-        )
-        if secondary_index_name:
-            secondary_index_name = f"{VECTOR_DB_INDEX_NAME_PREFIX__INTEGRATION_TEST_ONLY}_{secondary_index_name}"
-
    # Currently only supporting Vespa
    return VespaIndex(
-        index_name=primary_index_name,
+        index_name=search_settings.index_name,
        secondary_index_name=secondary_index_name,
        large_chunks_enabled=search_settings.large_chunks_enabled,
        secondary_large_chunks_enabled=secondary_large_chunks_enabled,
        multitenant=MULTI_TENANT,
        httpx_client=httpx_client,
-        preserve_existing_indices=bool(
-            VECTOR_DB_INDEX_NAME_PREFIX__INTEGRATION_TEST_ONLY
-        ),
    )


--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@@ -73,7 +73,7 @@ from onyx.document_index.vespa_constants import VESPA_DIM_REPLACEMENT_PAT
 from onyx.document_index.vespa_constants import VESPA_TIMEOUT
 from onyx.document_index.vespa_constants import YQL_BASE
 from onyx.indexing.models import DocMetadataAwareIndexChunk
-from onyx.key_value_store.factory import get_kv_store
+from onyx.key_value_store.factory import get_shared_kv_store
 from onyx.utils.batching import batch_generator
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT
@@ -136,7 +136,6 @@ class VespaIndex(DocumentIndex):
        secondary_large_chunks_enabled: bool | None,
        multitenant: bool = False,
        httpx_client: httpx.Client | None = None,
-        preserve_existing_indices: bool = False,
    ) -> None:
        self.index_name = index_name
        self.secondary_index_name = secondary_index_name
@@ -162,18 +161,18 @@ class VespaIndex(DocumentIndex):
                secondary_index_name
            ] = secondary_large_chunks_enabled

-        self.preserve_existing_indices = preserve_existing_indices
-
-    @classmethod
-    def create_indices(
-        cls,
-        indices: list[tuple[str, int, bool]],
-        application_endpoint: str = VESPA_APPLICATION_ENDPOINT,
+    def ensure_indices_exist(
+        self,
+        index_embedding_dim: int,
+        secondary_index_embedding_dim: int | None,
    ) -> None:
-        """
-        Create indices in Vespa based on the passed in configuration(s).
-        """
-        deploy_url = f"{application_endpoint}/tenant/default/prepareandactivate"
+        if MULTI_TENANT:
+            logger.info(
+                "Skipping Vespa index seup for multitenant (would wipe all indices)"
+            )
+            return None
+
+        deploy_url = f"{VESPA_APPLICATION_ENDPOINT}/tenant/default/prepareandactivate"
        logger.notice(f"Deploying Vespa application package to {deploy_url}")

        vespa_schema_path = os.path.join(
@@ -186,7 +185,7 @@ class VespaIndex(DocumentIndex):
        with open(services_file, "r") as services_f:
            services_template = services_f.read()

-        schema_names = [index_name for (index_name, _, _) in indices]
+        schema_names = [self.index_name, self.secondary_index_name]

        doc_lines = _create_document_xml_lines(schema_names)
        services = services_template.replace(DOCUMENT_REPLACEMENT_PAT, doc_lines)
@@ -194,6 +193,14 @@ class VespaIndex(DocumentIndex):
            SEARCH_THREAD_NUMBER_PAT, str(VESPA_SEARCHER_THREADS)
        )

+        kv_store = get_shared_kv_store()
+
+        needs_reindexing = False
+        try:
+            needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY))
+        except Exception:
+            logger.debug("Could not load the reindexing flag. Using ngrams")
+
        with open(overrides_file, "r") as overrides_f:
            overrides_template = overrides_f.read()

@@ -214,63 +221,32 @@ class VespaIndex(DocumentIndex):
            schema_template = schema_f.read()
        schema_template = schema_template.replace(TENANT_ID_PAT, "")

-        for index_name, index_embedding_dim, needs_reindexing in indices:
-            schema = schema_template.replace(
-                DANSWER_CHUNK_REPLACEMENT_PAT, index_name
-            ).replace(VESPA_DIM_REPLACEMENT_PAT, str(index_embedding_dim))
+        schema = schema_template.replace(
+            DANSWER_CHUNK_REPLACEMENT_PAT, self.index_name
+        ).replace(VESPA_DIM_REPLACEMENT_PAT, str(index_embedding_dim))

-            schema = add_ngrams_to_schema(schema) if needs_reindexing else schema
-            schema = schema.replace(TENANT_ID_PAT, "")
-            logger.info(
-                f"Creating index: {index_name} with embedding "
-                f"dimension: {index_embedding_dim}. Schema:\n\n {schema}"
-            )
-            zip_dict[f"schemas/{index_name}.sd"] = schema.encode("utf-8")
+        schema = add_ngrams_to_schema(schema) if needs_reindexing else schema
+        schema = schema.replace(TENANT_ID_PAT, "")
+        zip_dict[f"schemas/{schema_names[0]}.sd"] = schema.encode("utf-8")
+
+        if self.secondary_index_name:
+            upcoming_schema = schema_template.replace(
+                DANSWER_CHUNK_REPLACEMENT_PAT, self.secondary_index_name
+            ).replace(VESPA_DIM_REPLACEMENT_PAT, str(secondary_index_embedding_dim))
+            zip_dict[f"schemas/{schema_names[1]}.sd"] = upcoming_schema.encode("utf-8")

        zip_file = in_memory_zip_from_file_bytes(zip_dict)

        headers = {"Content-Type": "application/zip"}
        response = requests.post(deploy_url, headers=headers, data=zip_file)
        if response.status_code != 200:
-            logger.error(f"Failed to create Vespa indices: {response.text}")
+            logger.error(
+                f"Failed to prepare Vespa Onyx Index. Response: {response.text}"
+            )
            raise RuntimeError(
                f"Failed to prepare Vespa Onyx Index. Response: {response.text}"
            )

-    def ensure_indices_exist(
-        self,
-        index_embedding_dim: int,
-        secondary_index_embedding_dim: int | None,
-    ) -> None:
-        if self.multitenant or MULTI_TENANT:  # be extra safe here
-            logger.info(
-                "Skipping Vespa index setup for multitenant (would wipe all indices)"
-            )
-            return None
-
-        # Used in IT
-        # NOTE: this means that we can't switch embedding models
-        if self.preserve_existing_indices:
-            logger.info("Preserving existing indices")
-            return None
-
-        kv_store = get_kv_store()
-        primary_needs_reindexing = False
-        try:
-            primary_needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY))
-        except Exception:
-            logger.debug("Could not load the reindexing flag. Using ngrams")
-
-        indices = [
-            (self.index_name, index_embedding_dim, primary_needs_reindexing),
-        ]
-        if self.secondary_index_name and secondary_index_embedding_dim:
-            indices.append(
-                (self.secondary_index_name, secondary_index_embedding_dim, False)
-            )
-
-        self.create_indices(indices)
-
    @staticmethod
    def register_multitenant_indices(
        indices: list[str],
@@ -304,7 +280,7 @@ class VespaIndex(DocumentIndex):
            SEARCH_THREAD_NUMBER_PAT, str(VESPA_SEARCHER_THREADS)
        )

-        kv_store = get_kv_store()
+        kv_store = get_shared_kv_store()

        needs_reindexing = False
        try:
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
pablonyx	9cf5cdba2c	improve scroll	2025-02-22 13:25:23 -08:00
Weves	bdaa293ae4	Fix nginx for prod compose file	2025-02-21 16:57:54 -08:00
pablonyx	5a131f4547	Fix integration tests (#4059 )	2025-02-21 15:56:11 -08:00
rkuo-danswer	ffb7d5b85b	enable manual testing for model server (#4003 ) * trying out a fix * add ability to manually run model tests --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-21 14:00:32 -08:00
rkuo-danswer	fe8a5d671a	don't spam the logs with texts on auth errors (#4085 ) * don't spam the logs with texts on auth errors * refactor the logging a bit --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-21 13:40:07 -08:00
Yuhong Sun	6de53ebf60	README Touchup (#4088 )	2025-02-21 13:31:07 -08:00
rkuo-danswer	61d536c782	tool fixes (#4075 )	2025-02-21 12:30:33 -08:00
Chris Weaver	e1ff9086a4	Fix LLM selection (#4078 )	2025-02-21 11:32:57 -08:00
evan-danswer	ba21bacbbf	coerce useLanggraph to boolean (#4084 ) * coerce useLanggraph to boolean	2025-02-21 09:43:46 -08:00
pablonyx	158bccc3fc	Default on for non-ee (#4083 )	2025-02-21 09:11:45 -08:00
Weves	599b7705c2	Fix gitbook connector issues	2025-02-20 15:29:11 -08:00
rkuo-danswer	4958a5355d	try more efficient query (#4047 )	2025-02-20 12:58:50 -08:00
Chris Weaver	c4b8519381	Add support for sending email invites for single tenant users (#4065 )	2025-02-19 21:05:23 -08:00
rkuo-danswer	8b4413694a	fix usage of tenant_id (#4062 ) Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-19 17:50:58 -08:00
pablonyx	57cf7d9fac	default agent search `on`	2025-02-19 17:21:26 -08:00
Chris Weaver	ad4efb5f20	Pin xmlsec version + improve SAML flow (#4054 ) * Pin xmlsec version * testing * test nginx conf change * Pass through more * Cleanup + remove DOMAIN across the board	2025-02-19 16:02:05 -08:00
evan-danswer	e304ec4ab6	Agent search history displayed answer (#4052 )	2025-02-19 15:52:16 -08:00
joachim-danswer	1690dc45ba	timout bumps (#4057 )	2025-02-19 15:51:45 -08:00
pablonyx	7582ba1640	Fix streaming (#4055 )	2025-02-19 15:23:40 -08:00
pablonyx	99fc546943	Miscellaneous indexing fixes (#4042 )	2025-02-19 11:34:49 -08:00
pablonyx	353c185856	Update error class (#4006 )	2025-02-19 10:52:23 -08:00
pablonyx	7c96b7f24e	minor alembic nit	2025-02-19 10:47:33 -08:00
pablonyx	31524a3eff	add connector validation (#4016 )	2025-02-19 10:46:06 -08:00
rkuo-danswer	c9f618798e	support scrolling before scraping (#4040 ) * support scrolling before scraping * fix mypy * install playwright deps --------- Co-authored-by: Richard Kuo <rkuo@rkuo.com>	2025-02-19 17:54:58 +00:00
rkuo-danswer	11f6b44625	Feature/indexing hard timeout 3 (#3980 ) * WIP * implement hard timeout * fix callbacks * put back the timeout * missed a file * fixes * try installing playwright deps * Revert "try installing playwright deps" This reverts commit `4217427568`. --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app> Co-authored-by: Richard Kuo <rkuo@rkuo.com>	2025-02-19 04:12:13 +00:00
pablonyx	e82a25f49e	Non-SMTP password reset (#4031 ) * update * validate * k * minor cleanup * nit * finalize * k * fix tests * fix tests * fix tests	2025-02-19 02:02:28 +00:00
Weves	5a9ec61446	Don't pass thorugh parallel_tool_calls for o-family models	2025-02-18 18:57:05 -08:00
pablonyx	9635522de8	Admin default (#4032 ) * clean up * minor cleanup * building * update agnetic message look * k * fix alembic history	2025-02-18 18:31:54 -08:00
Yuhong Sun	630bdf71a3	Update README (#4044 )	2025-02-18 18:31:28 -08:00
pablonyx	47fd4fa233	Strict Tenant ID Enforcement (#3871 ) * strict tenant id enforcement * k * k * nit * merge * nit * k	2025-02-19 00:52:56 +00:00
Weves	2013beb9e0	Adjust behavior when display_model_names is null	2025-02-18 16:19:08 -08:00
pablonyx	466276161c	Quick link fix (#4039 )	2025-02-18 16:18:41 -08:00
rkuo-danswer	c934892c68	add index to document__tag.tag_id (#4038 ) Co-authored-by: Richard Kuo <rkuo@rkuo.com>	2025-02-18 19:51:36 +00:00
joachim-danswer	1daa3a663d	timout bumps (#4037 )	2025-02-18 18:26:29 +00:00
Chris Weaver	7324273233	Small confluence group sync tweaks (#4033 )	2025-02-18 07:05:41 +00:00
evan-danswer	2b2ba5478c	new is_agentic flag for chatmessages (#4026 ) * new is_agentic flag for chatmessages * added cancelled error to db * added cancelled error to returned message	2025-02-18 04:20:33 +00:00
pablonyx	045a41d929	Add default slack bot disabling (#3935 ) * add slack bot disabling * update * k * minor	2025-02-18 04:08:33 +00:00
pablonyx	e3bc7cc747	improve validation schema (#3984 )	2025-02-18 03:18:23 +00:00
evan-danswer	0826b035a2	Update README.md (#3908 ) * Update README.md help future integration test runners * Update README.md * Update README.md --------- Co-authored-by: pablonyx <pablo@danswer.ai>	2025-02-18 03:08:47 +00:00
pablonyx	cf0e3d1ff4	fix main	2025-02-17 18:23:15 -08:00
evan-danswer	10c81f75e2	consistent refined answer improvement (#4027 )	2025-02-17 21:02:03 +00:00
evan-danswer	5ca898bde2	Force use tool overrides (#4024 ) * initial rename + timeout bump * querry override	2025-02-17 21:01:24 +00:00
pablonyx	58b252727f	UX (#4014 )	2025-02-17 13:21:43 -08:00
joachim-danswer	86bd121806	no reranking if local model w/o GPU for Agent Search (#4011 ) * no reranking if locql model w/o GPU * more efficient gpu status calling * fix unit tests --------- Co-authored-by: Evan Lohn <evan@danswer.ai>	2025-02-17 14:13:24 +00:00
evan-danswer	9324f426c0	added timeouts for agent llm calls (#4019 ) * added timeouts for agent llm calls * timing suggestions in agent config * improved timeout that actually exits early * added new global timeout and connection timeout distinction * fixed error raising bug and made entity extraction recoverable * warnings and refactor * mypy --------- Co-authored-by: joachim-danswer <joachim@danswer.ai>	2025-02-17 07:02:19 +00:00
joachim-danswer	20d3efc86e	By default, use primary LLM for initial & refined answer (#4012 ) * By default, use primary LLM for initial & refined answer Use of new env variable * simplification	2025-02-16 23:20:07 +00:00