nit

2026-02-17 15:55:45 +00:00 · 2025-02-13 17:20:46 -08:00
306 changed files with 3111 additions and 10567 deletions
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -1,6 +1,6 @@
-name: Run Playwright Tests
+name: Run Chromatic Tests
 concurrency:
-  group: Run-Playwright-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  group: Run-Chromatic-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

 on: push
@@ -198,47 +198,43 @@ jobs:
          cd deployment/docker_compose
          docker compose -f docker-compose.dev.yml -p danswer-stack down -v

-# NOTE: Chromatic UI diff testing is currently disabled.
-# We are using Playwright for local and CI testing without visual regression checks.
-# Chromatic may be reintroduced in the future for UI diff testing if needed.
+  chromatic-tests:
+    name: Chromatic Tests

-# chromatic-tests:
-#   name: Chromatic Tests
+    needs: playwright-tests
+    runs-on:
+      [
+        runs-on,
+        runner=32cpu-linux-x64,
+        disk=large,
+        "run-id=${{ github.run_id }}",
+      ]
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0

-#   needs: playwright-tests
-#   runs-on:
-#     [
-#       runs-on,
-#       runner=32cpu-linux-x64,
-#       disk=large,
-#       "run-id=${{ github.run_id }}",
-#     ]
-#   steps:
-#     - name: Checkout code
-#       uses: actions/checkout@v4
-#       with:
-#         fetch-depth: 0
+      - name: Setup node
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22

-#     - name: Setup node
-#       uses: actions/setup-node@v4
-#       with:
-#         node-version: 22
+      - name: Install node dependencies
+        working-directory: ./web
+        run: npm ci

-#     - name: Install node dependencies
-#       working-directory: ./web
-#       run: npm ci
+      - name: Download Playwright test results
+        uses: actions/download-artifact@v4
+        with:
+          name: test-results
+          path: ./web/test-results

-#     - name: Download Playwright test results
-#       uses: actions/download-artifact@v4
-#       with:
-#         name: test-results
-#         path: ./web/test-results
-
-#     - name: Run Chromatic
-#       uses: chromaui/action@latest
-#       with:
-#         playwright: true
-#         projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
-#         workingDir: ./web
-#       env:
-#         CHROMATIC_ARCHIVE_LOCATION: ./test-results
+      - name: Run Chromatic
+        uses: chromaui/action@latest
+        with:
+          playwright: true
+          projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
+          workingDir: ./web
+        env:
+          CHROMATIC_ARCHIVE_LOCATION: ./test-results
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -99,7 +99,7 @@ jobs:
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
          DEV_MODE=true \
-          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack up -d
+          docker compose -f docker-compose.multitenant-dev.yml -p danswer-stack up -d
        id: start_docker_multi_tenant

      # In practice, `cloud` Auth type would require OAUTH credentials to be set.
@@ -108,13 +108,12 @@ jobs:
          echo "Waiting for 3 minutes to ensure API server is ready..."
          sleep 180
          echo "Running integration tests..."
-          docker run --rm --network onyx-stack_default \
+          docker run --rm --network danswer-stack_default \
            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
            -e POSTGRES_DB=postgres \
-            -e POSTGRES_USE_NULL_POOL=true \
            -e VESPA_HOST=index \
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
@@ -144,28 +143,24 @@ jobs:
      - name: Stop multi-tenant Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack down -v
+          docker compose -f docker-compose.multitenant-dev.yml -p danswer-stack down -v

-      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
          AUTH_TYPE=basic \
-          POSTGRES_POOL_PRE_PING=true \
-          POSTGRES_USE_NULL_POOL=true \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
-          INTEGRATION_TESTS_MODE=true \
-          docker compose -f docker-compose.dev.yml -p onyx-stack up -d
+          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
        id: start_docker

      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."

-          docker logs -f onyx-stack-api_server-1 &
+          docker logs -f danswer-stack-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
@@ -195,24 +190,15 @@ jobs:
          done
          echo "Finished waiting for service."

-      - name: Start Mock Services
-        run: |
-          cd backend/tests/integration/mock_services
-          docker compose -f docker-compose.mock-it-services.yml \
-            -p mock-it-services-stack up -d
-
-      # NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
      - name: Run Standard Integration Tests
        run: |
          echo "Running integration tests..."
-          docker run --rm --network onyx-stack_default \
+          docker run --rm --network danswer-stack_default \
            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
            -e POSTGRES_DB=postgres \
-            -e POSTGRES_POOL_PRE_PING=true \
-            -e POSTGRES_USE_NULL_POOL=true \
            -e VESPA_HOST=index \
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
@@ -222,8 +208,6 @@ jobs:
            -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
            -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
            -e TEST_WEB_HOSTNAME=test-runner \
-            -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
-            -e MOCK_CONNECTOR_SERVER_PORT=8001 \
            onyxdotapp/onyx-integration:test \
            /app/tests/integration/tests \
            /app/tests/integration/connector_job_tests
@@ -245,13 +229,13 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true
+          docker compose -f docker-compose.dev.yml -p danswer-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true

      - name: Dump all-container logs (optional)
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+          docker compose -f docker-compose.dev.yml -p danswer-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
        if: always()
@@ -265,4 +249,4 @@ jobs:
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack down -v
+          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -44,9 +44,6 @@ env:
  SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
  SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
  SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
-  # Gitbook
-  GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
-  GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}

 jobs:
  connectors-check:
@@ -74,9 +71,7 @@ jobs:
          python -m pip install --upgrade pip
          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-          playwright install chromium
-          playwright install-deps chromium
-          
+
      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/connectors
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -1,16 +1,10 @@
-name: Model Server Tests
+name: Connector Tests

 on:
  schedule:
    # This cron expression runs the job daily at 16:00 UTC (9am PT)
    - cron: "0 16 * * *"
-  workflow_dispatch:
-    inputs:
-      branch:
-        description: 'Branch to run the workflow on'
-        required: false
-        default: 'main'
-        
+
 env:
  # Bedrock
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -32,23 +26,6 @@ jobs:
      - name: Checkout code
        uses: actions/checkout@v4

-      - name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      # tag every docker image with "test" so that we can spin up the correct set
-      # of images during testing
-
-      # We don't need to build the Web Docker image since it's not yet used
-      # in the integration tests. We have a separate action to verify that it builds
-      # successfully.
-      - name: Pull Model Server Docker image
-        run: |
-          docker pull onyxdotapp/onyx-model-server:latest
-          docker tag onyxdotapp/onyx-model-server:latest onyxdotapp/onyx-model-server:test
-          
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
@@ -64,49 +41,6 @@ jobs:
          pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
          pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt

-      - name: Start Docker containers
-        run: |
-          cd deployment/docker_compose
-          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
-          AUTH_TYPE=basic \
-          REQUIRE_EMAIL_VERIFICATION=false \
-          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=test \
-          docker compose -f docker-compose.dev.yml -p onyx-stack up -d indexing_model_server
-        id: start_docker
-
-      - name: Wait for service to be ready
-        run: |
-          echo "Starting wait-for-service script..."
-
-          start_time=$(date +%s)
-          timeout=300  # 5 minutes in seconds
-
-          while true; do
-            current_time=$(date +%s)
-            elapsed_time=$((current_time - start_time))
-            
-            if [ $elapsed_time -ge $timeout ]; then
-              echo "Timeout reached. Service did not become ready in 5 minutes."
-              exit 1
-            fi
-            
-            # Use curl with error handling to ignore specific exit code 56
-            response=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:9000/api/health || echo "curl_error")
-            
-            if [ "$response" = "200" ]; then
-              echo "Service is ready!"
-              break
-            elif [ "$response" = "curl_error" ]; then
-              echo "Curl encountered an error, possibly exit code 56. Continuing to retry..."
-            else
-              echo "Service not ready yet (HTTP status $response). Retrying in 5 seconds..."
-            fi
-            
-            sleep 5
-          done
-          echo "Finished waiting for service."
-          
      - name: Run Tests
        shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}"
        run: |
@@ -122,10 +56,3 @@ jobs:
            -H 'Content-type: application/json' \
            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
            $SLACK_WEBHOOK
-            
-      - name: Stop Docker containers
-        if: always()
-        run: |
-          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack down -v
-          
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -205,7 +205,7 @@
                "--loglevel=INFO",
                "--hostname=light@%n",
                "-Q",
-                "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup",
+                "vespa_metadata_sync,connector_deletion,doc_permissions_upsert",
            ],
            "presentation": {
 				 "group": "2",
--- a/README.md
+++ b/README.md
@@ -24,93 +24,113 @@
 </a>
 </p>

-<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI platform connected to your company's docs, apps, and people.
-Onyx provides a feature rich Chat interface and plugs into any LLM of your choice.
-Keep knowledge and access controls sync-ed across over 40 connectors like Google Drive, Slack, Confluence, Salesforce, etc.
-Create custom AI agents with unique prompts, knowledge, and actions that the agents can take.
-Onyx can be deployed securely anywhere and for any scale - on a laptop, on-premise, or to cloud.
+<strong>[Onyx](https://www.onyx.app/)</strong> (formerly Danswer) is the AI Assistant connected to your company's docs, apps, and people.
+Onyx provides a Chat interface and plugs into any LLM of your choice. Onyx can be deployed anywhere and for any
+scale - on a laptop, on-premise, or to cloud. Since you own the deployment, your user data and chats are fully in your
+own control. Onyx is dual Licensed with most of it under MIT license and designed to be modular and easily extensible. The system also comes fully ready
+for production usage with user authentication, role management (admin/basic users), chat persistence, and a UI for
+configuring AI Assistants.

+Onyx also serves as a Enterprise Search across all common workplace tools such as Slack, Google Drive, Confluence, etc.
+By combining LLMs and team specific knowledge, Onyx becomes a subject matter expert for the team. Imagine ChatGPT if
+it had access to your team's unique knowledge! It enables questions such as "A customer wants feature X, is this already
+supported?" or "Where's the pull request for feature Y?"

-<h3>Feature Highlights</h3>
+<h3>Usage</h3>

-**Deep research over your team's knowledge:**
+Onyx Web App:

-https://private-user-images.githubusercontent.com/32520769/414509312-48392e83-95d0-4fb5-8650-a396e05e0a32.mp4?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3Mzk5Mjg2MzYsIm5iZiI6MTczOTkyODMzNiwicGF0aCI6Ii8zMjUyMDc2OS80MTQ1MDkzMTItNDgzOTJlODMtOTVkMC00ZmI1LTg2NTAtYTM5NmUwNWUwYTMyLm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAyMTklMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMjE5VDAxMjUzNlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPWFhMzk5Njg2Y2Y5YjFmNDNiYTQ2YzM5ZTg5YWJiYTU2NWMyY2YwNmUyODE2NWUxMDRiMWQxZWJmODI4YTA0MTUmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.a9D8A0sgKE9AoaoE-mfFbJ6_OKYeqaf7TZ4Han2JfW8
+https://github.com/onyx-dot-app/onyx/assets/32520769/563be14c-9304-47b5-bf0a-9049c2b6f410

+Or, plug Onyx into your existing Slack workflows (more integrations to come 😁):

-**Use Onyx as a secure AI Chat with any LLM:**
-
-![Onyx Chat Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxChatSilentDemo.gif)
-
-
-**Easily set up connectors to your apps:**
-
-![Onyx Connector Silent Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxConnectorSilentDemo.gif)
-
-
-**Access Onyx where your team already works:**
-
-![Onyx Bot Demo](https://github.com/onyx-dot-app/onyx/releases/download/v0.21.1/OnyxBot.png)
+https://github.com/onyx-dot-app/onyx/assets/25087905/3e19739b-d178-4371-9a38-011430bdec1b

+For more details on the Admin UI to manage connectors and users, check out our
+<strong><a href="https://www.youtube.com/watch?v=geNzY1nbCnU">Full Video Demo</a></strong>!

 ## Deployment
-**To try it out for free and get started in seconds, check out [Onyx Cloud](https://cloud.onyx.app/signup)**.

-Onyx can also be run locally (even on a laptop) or deployed on a virtual machine with a single
+Onyx can easily be run locally (even on a laptop) or deployed on a virtual machine with a single
 `docker compose` command. Checkout our [docs](https://docs.onyx.app/quickstart) to learn more.

-We also have built-in support for high-availability/scalable deployment on Kubernetes.
-References [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment).
+We also have built-in support for deployment on Kubernetes. Files for that can be found [here](https://github.com/onyx-dot-app/onyx/tree/main/deployment/kubernetes).

+## 💃 Main Features

-## 🔍 Other Notable Benefits of Onyx
- Custom deep learning models for indexing and inference time, only through Onyx + learning from user feedback.
- Flexible security features like SSO (OIDC/SAML/OAuth2), RBAC, encryption of credentials, etc.
- Knowledge curation features like document-sets, query history, usage analytics, etc.
- Scalable deployment options tested up to many tens of thousands users and hundreds of millions of documents.
-
+- Chat UI with the ability to select documents to chat with.
+- Create custom AI Assistants with different prompts and backing knowledge sets.
+- Connect Onyx with LLM of your choice (self-host for a fully airgapped solution).
+- Document Search + AI Answers for natural language queries.
+- Connectors to all common workplace tools like Google Drive, Confluence, Slack, etc.
+- Slack integration to get answers and search results directly in Slack.

 ## 🚧 Roadmap
- New methods in information retrieval (StructRAG, LightGraphRAG, etc.)
- Personalized Search
- Organizational understanding and ability to locate and suggest experts from your team.
- Code Search
- SQL and Structured Query Language

+- Chat/Prompt sharing with specific teammates and user groups.
+- Multimodal model support, chat with images, video etc.
+- Choosing between LLMs and parameters during chat session.
+- Tool calling and agent configurations options.
+- Organizational understanding and ability to locate and suggest experts from your team.
+
+## Other Notable Benefits of Onyx
+
+- User Authentication with document level access management.
+- Best in class Hybrid Search across all sources (BM-25 + prefix aware embedding models).
+- Admin Dashboard to configure connectors, document-sets, access, etc.
+- Custom deep learning models + learn from user feedback.
+- Easy deployment and ability to host Onyx anywhere of your choosing.

 ## 🔌 Connectors
-Keep knowledge and access up to sync across 40+ connectors:

+Efficiently pulls the latest changes from:
+
+- Slack
+- GitHub
 - Google Drive
 - Confluence
- Slack
- Gmail
- Salesforce
- Microsoft Sharepoint
- Github
 - Jira
 - Zendesk
+- Gmail
+- Notion
 - Gong
- Microsoft Teams
- Dropbox
+- Slab
+- Linear
+- Productboard
+- Guru
+- Bookstack
+- Document360
+- Sharepoint
+- Hubspot
 - Local Files
 - Websites
 - And more ...

-See the full list [here](https://docs.onyx.app/connectors).
+## 📚 Editions

-
-## 📚 Licensing
 There are two editions of Onyx:

- Onyx Community Edition (CE) is available freely under the MIT Expat license. Simply follow the Deployment guide above.
- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations.
-For feature details, check out [our website](https://www.onyx.app/pricing).
+- Onyx Community Edition (CE) is available freely under the MIT Expat license. This version has ALL the core features discussed above. This is the version of Onyx you will get if you follow the Deployment guide above.
+- Onyx Enterprise Edition (EE) includes extra features that are primarily useful for larger organizations. Specifically, this includes:
+  - Single Sign-On (SSO), with support for both SAML and OIDC
+  - Role-based access control
+  - Document permission inheritance from connected sources
+  - Usage analytics and query history accessible to admins
+  - Whitelabeling
+  - API key authentication
+  - Encryption of secrets
+  - And many more! Checkout [our website](https://www.onyx.app/) for the latest.

 To try the Onyx Enterprise Edition:
-1. Checkout [Onyx Cloud](https://cloud.onyx.app/signup).
-2. For self-hosting the Enterprise Edition, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).

+1. Checkout our [Cloud product](https://cloud.onyx.app/signup).
+2. For self-hosting, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).

 ## 💡 Contributing
+
 Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details.
+
+## ⭐Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=onyx-dot-app/onyx&type=Date)](https://star-history.com/#onyx-dot-app/onyx&Date)
+
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -28,16 +28,14 @@ RUN apt-get update && \
        curl \
        zip \
        ca-certificates \
-        libgnutls30 \
-        libblkid1 \
-        libmount1 \
-        libsmartcols1 \
-        libuuid1 \
+        libgnutls30=3.7.9-2+deb12u3 \
+        libblkid1=2.38.1-5+deb12u1 \
+        libmount1=2.38.1-5+deb12u1 \
+        libsmartcols1=2.38.1-5+deb12u1 \
+        libuuid1=2.38.1-5+deb12u1 \
        libxmlsec1-dev \
        pkg-config \
-        gcc \
-        nano \
-        vim && \
+        gcc && \
    rm -rf /var/lib/apt/lists/* && \
    apt-get clean

--- a/backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
+++ b/backend/alembic/versions/1a03d2c2856b_add_indexes_to_document__tag.py
@@ -1,27 +0,0 @@
-"""Add indexes to document__tag
-
-Revision ID: 1a03d2c2856b
-Revises: 9c00a2bccb83
-Create Date: 2025-02-18 10:45:13.957807
-
-"""
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "1a03d2c2856b"
-down_revision = "9c00a2bccb83"
-branch_labels: None = None
-depends_on: None = None
-
-
-def upgrade() -> None:
-    op.create_index(
-        op.f("ix_document__tag_tag_id"),
-        "document__tag",
-        ["tag_id"],
-        unique=False,
-    )
-
-
-def downgrade() -> None:
-    op.drop_index(op.f("ix_document__tag_tag_id"), table_name="document__tag")
--- a/backend/alembic/versions/9c00a2bccb83_chat_message_agentic.py
+++ b/backend/alembic/versions/9c00a2bccb83_chat_message_agentic.py
@@ -1,43 +0,0 @@
-"""chat_message_agentic
-
-Revision ID: 9c00a2bccb83
-Revises: b7a7eee5aa15
-Create Date: 2025-02-17 11:15:43.081150
-
-"""
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision = "9c00a2bccb83"
-down_revision = "b7a7eee5aa15"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    # First add the column as nullable
-    op.add_column("chat_message", sa.Column("is_agentic", sa.Boolean(), nullable=True))
-
-    # Update existing rows based on presence of SubQuestions
-    op.execute(
-        """
-        UPDATE chat_message
-        SET is_agentic = EXISTS (
-            SELECT 1
-            FROM agent__sub_question
-            WHERE agent__sub_question.primary_question_id = chat_message.id
-        )
-        WHERE is_agentic IS NULL
-    """
-    )
-
-    # Make the column non-nullable with a default value of False
-    op.alter_column(
-        "chat_message", "is_agentic", nullable=False, server_default=sa.text("false")
-    )
-
-
-def downgrade() -> None:
-    op.drop_column("chat_message", "is_agentic")
--- a/backend/alembic/versions/acaab4ef4507_remove_inactive_ccpair_status_on_.py
+++ b/backend/alembic/versions/acaab4ef4507_remove_inactive_ccpair_status_on_.py
@@ -1,29 +0,0 @@
-"""remove inactive ccpair status on downgrade
-
-Revision ID: acaab4ef4507
-Revises: b388730a2899
-Create Date: 2025-02-16 18:21:41.330212
-
-"""
-from alembic import op
-from onyx.db.models import ConnectorCredentialPair
-from onyx.db.enums import ConnectorCredentialPairStatus
-from sqlalchemy import update
-
-# revision identifiers, used by Alembic.
-revision = "acaab4ef4507"
-down_revision = "b388730a2899"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    pass
-
-
-def downgrade() -> None:
-    op.execute(
-        update(ConnectorCredentialPair)
-        .where(ConnectorCredentialPair.status == ConnectorCredentialPairStatus.INVALID)
-        .values(status=ConnectorCredentialPairStatus.ACTIVE)
-    )
--- a/backend/alembic/versions/b388730a2899_nullable_preferences.py
+++ b/backend/alembic/versions/b388730a2899_nullable_preferences.py
@@ -1,31 +0,0 @@
-"""nullable preferences
-
-Revision ID: b388730a2899
-Revises: 1a03d2c2856b
-Create Date: 2025-02-17 18:49:22.643902
-
-"""
-from alembic import op
-
-
-# revision identifiers, used by Alembic.
-revision = "b388730a2899"
-down_revision = "1a03d2c2856b"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.alter_column("user", "temperature_override_enabled", nullable=True)
-    op.alter_column("user", "auto_scroll", nullable=True)
-
-
-def downgrade() -> None:
-    # Ensure no null values before making columns non-nullable
-    op.execute(
-        'UPDATE "user" SET temperature_override_enabled = false WHERE temperature_override_enabled IS NULL'
-    )
-    op.execute('UPDATE "user" SET auto_scroll = false WHERE auto_scroll IS NULL')
-
-    op.alter_column("user", "temperature_override_enabled", nullable=False)
-    op.alter_column("user", "auto_scroll", nullable=False)
--- a/backend/alembic/versions/b7a7eee5aa15_add_checkpointing_failure_handling.py
+++ b/backend/alembic/versions/b7a7eee5aa15_add_checkpointing_failure_handling.py
@@ -1,124 +0,0 @@
-"""Add checkpointing/failure handling
-
-Revision ID: b7a7eee5aa15
-Revises: f39c5794c10a
-Create Date: 2025-01-24 15:17:36.763172
-
-"""
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision = "b7a7eee5aa15"
-down_revision = "f39c5794c10a"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
-    op.add_column(
-        "index_attempt",
-        sa.Column("checkpoint_pointer", sa.String(), nullable=True),
-    )
-    op.add_column(
-        "index_attempt",
-        sa.Column("poll_range_start", sa.DateTime(timezone=True), nullable=True),
-    )
-    op.add_column(
-        "index_attempt",
-        sa.Column("poll_range_end", sa.DateTime(timezone=True), nullable=True),
-    )
-
-    op.create_index(
-        "ix_index_attempt_cc_pair_settings_poll",
-        "index_attempt",
-        [
-            "connector_credential_pair_id",
-            "search_settings_id",
-            "status",
-            sa.text("time_updated DESC"),
-        ],
-    )
-
-    # Drop the old IndexAttemptError table
-    op.drop_index("index_attempt_id", table_name="index_attempt_errors")
-    op.drop_table("index_attempt_errors")
-
-    # Create the new version of the table
-    op.create_table(
-        "index_attempt_errors",
-        sa.Column("id", sa.Integer(), primary_key=True),
-        sa.Column("index_attempt_id", sa.Integer(), nullable=False),
-        sa.Column("connector_credential_pair_id", sa.Integer(), nullable=False),
-        sa.Column("document_id", sa.String(), nullable=True),
-        sa.Column("document_link", sa.String(), nullable=True),
-        sa.Column("entity_id", sa.String(), nullable=True),
-        sa.Column("failed_time_range_start", sa.DateTime(timezone=True), nullable=True),
-        sa.Column("failed_time_range_end", sa.DateTime(timezone=True), nullable=True),
-        sa.Column("failure_message", sa.Text(), nullable=False),
-        sa.Column("is_resolved", sa.Boolean(), nullable=False, default=False),
-        sa.Column(
-            "time_created",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-            nullable=False,
-        ),
-        sa.ForeignKeyConstraint(
-            ["index_attempt_id"],
-            ["index_attempt.id"],
-        ),
-        sa.ForeignKeyConstraint(
-            ["connector_credential_pair_id"],
-            ["connector_credential_pair.id"],
-        ),
-    )
-
-
-def downgrade() -> None:
-    op.execute("SET lock_timeout = '5s'")
-
-    # try a few times to drop the table, this has been observed to fail due to other locks
-    # blocking the drop
-    NUM_TRIES = 10
-    for i in range(NUM_TRIES):
-        try:
-            op.drop_table("index_attempt_errors")
-            break
-        except Exception as e:
-            if i == NUM_TRIES - 1:
-                raise e
-            print(f"Error dropping table: {e}. Retrying...")
-
-    op.execute("SET lock_timeout = DEFAULT")
-
-    # Recreate the old IndexAttemptError table
-    op.create_table(
-        "index_attempt_errors",
-        sa.Column("id", sa.Integer(), primary_key=True),
-        sa.Column("index_attempt_id", sa.Integer(), nullable=True),
-        sa.Column("batch", sa.Integer(), nullable=True),
-        sa.Column("doc_summaries", postgresql.JSONB(), nullable=False),
-        sa.Column("error_msg", sa.Text(), nullable=True),
-        sa.Column("traceback", sa.Text(), nullable=True),
-        sa.Column(
-            "time_created",
-            sa.DateTime(timezone=True),
-            server_default=sa.text("now()"),
-        ),
-        sa.ForeignKeyConstraint(
-            ["index_attempt_id"],
-            ["index_attempt.id"],
-        ),
-    )
-
-    op.create_index(
-        "index_attempt_id",
-        "index_attempt_errors",
-        ["time_created"],
-    )
-
-    op.drop_index("ix_index_attempt_cc_pair_settings_poll")
-    op.drop_column("index_attempt", "checkpoint_pointer")
-    op.drop_column("index_attempt", "poll_range_start")
-    op.drop_column("index_attempt", "poll_range_end")
--- a/backend/alembic/versions/f13db29f3101_add_composite_index_for_last_modified_.py
+++ b/backend/alembic/versions/f13db29f3101_add_composite_index_for_last_modified_.py
@@ -1,27 +0,0 @@
-"""Add composite index for last_modified and last_synced to document
-
-Revision ID: f13db29f3101
-Revises: b388730a2899
-Create Date: 2025-02-18 22:48:11.511389
-
-"""
-from alembic import op
-
-# revision identifiers, used by Alembic.
-revision = "f13db29f3101"
-down_revision = "acaab4ef4507"
-branch_labels: str | None = None
-depends_on: str | None = None
-
-
-def upgrade() -> None:
-    op.create_index(
-        "ix_document_sync_status",
-        "document",
-        ["last_modified", "last_synced"],
-        unique=False,
-    )
-
-
-def downgrade() -> None:
-    op.drop_index("ix_document_sync_status", table_name="document")
--- a/backend/ee/onyx/background/celery/apps/primary.py
+++ b/backend/ee/onyx/background/celery/apps/primary.py
@@ -21,7 +21,7 @@ logger = setup_logger()
 def perform_ttl_management_task(
    retention_limit_days: int, *, tenant_id: str | None
 ) -> None:
-    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+    with get_session_with_tenant(tenant_id) as db_session:
        delete_chat_sessions_older_than(retention_limit_days, db_session)


@@ -44,7 +44,7 @@ def check_ttl_management_task(*, tenant_id: str | None) -> None:

    settings = load_settings()
    retention_limit_days = settings.maximum_chat_retention_days
-    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+    with get_session_with_tenant(tenant_id) as db_session:
        if should_perform_chat_ttl_check(retention_limit_days, db_session):
            perform_ttl_management_task.apply_async(
                kwargs=dict(
@@ -62,7 +62,7 @@ def check_ttl_management_task(*, tenant_id: str | None) -> None:
 )
 def autogenerate_usage_report_task(*, tenant_id: str | None) -> None:
    """This generates usage report under the /admin/generate-usage/report endpoint"""
-    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+    with get_session_with_tenant(tenant_id) as db_session:
        create_new_usage_report(
            db_session=db_session,
            user_id=None,
--- a/backend/ee/onyx/external_permissions/confluence/group_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/group_sync.py
@@ -14,24 +14,30 @@ def _build_group_member_email_map(
    confluence_client: OnyxConfluence, cc_pair_id: int
 ) -> dict[str, set[str]]:
    group_member_emails: dict[str, set[str]] = {}
-    for user in confluence_client.paginated_cql_user_retrieval():
-        logger.debug(f"Processing groups for user: {user}")
+    for user_result in confluence_client.paginated_cql_user_retrieval():
+        logger.debug(f"Processing groups for user: {user_result}")

-        email = user.email
+        user = user_result.get("user", {})
+        if not user:
+            msg = f"user result missing user field: {user_result}"
+            emit_background_error(msg, cc_pair_id=cc_pair_id)
+            logger.error(msg)
+            continue
+
+        email = user.get("email")
        if not email:
            # This field is only present in Confluence Server
-            user_name = user.username
+            user_name = user.get("username")
            # If it is present, try to get the email using a Server-specific method
            if user_name:
                email = get_user_email_from_username__server(
                    confluence_client=confluence_client,
                    user_name=user_name,
                )
-
        if not email:
            # If we still don't have an email, skip this user
-            msg = f"user result missing email field: {user}"
-            if user.type == "app":
+            msg = f"user result missing email field: {user_result}"
+            if user.get("type") == "app":
                logger.warning(msg)
            else:
                emit_background_error(msg, cc_pair_id=cc_pair_id)
@@ -39,7 +45,7 @@ def _build_group_member_email_map(
            continue

        all_users_groups: set[str] = set()
-        for group in confluence_client.paginated_groups_by_user_retrieval(user.user_id):
+        for group in confluence_client.paginated_groups_by_user_retrieval(user):
            # group name uniqueness is enforced by Confluence, so we can use it as a group ID
            group_id = group["name"]
            group_member_emails.setdefault(group_id, set()).add(email)
--- a/backend/ee/onyx/external_permissions/slack/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/slack/doc_sync.py
@@ -5,7 +5,7 @@ from onyx.access.models import DocExternalAccess
 from onyx.access.models import ExternalAccess
 from onyx.connectors.slack.connector import get_channels
 from onyx.connectors.slack.connector import make_paginated_slack_api_call_w_retries
-from onyx.connectors.slack.connector import SlackConnector
+from onyx.connectors.slack.connector import SlackPollConnector
 from onyx.db.models import ConnectorCredentialPair
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger
@@ -17,7 +17,7 @@ logger = setup_logger()
 def _get_slack_document_ids_and_channels(
    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
 ) -> dict[str, list[str]]:
-    slack_connector = SlackConnector(**cc_pair.connector.connector_specific_config)
+    slack_connector = SlackPollConnector(**cc_pair.connector.connector_specific_config)
    slack_connector.load_credentials(cc_pair.credential.credential_json)

    slim_doc_generator = slack_connector.retrieve_all_slim_documents(callback=callback)
--- a/backend/ee/onyx/server/middleware/tenant_tracking.py
+++ b/backend/ee/onyx/server/middleware/tenant_tracking.py
@@ -33,7 +33,7 @@ def add_tenant_id_middleware(app: FastAPI, logger: logging.LoggerAdapter) -> Non
            return await call_next(request)

        except Exception as e:
-            logger.exception(f"Error in tenant ID middleware: {str(e)}")
+            logger.error(f"Error in tenant ID middleware: {str(e)}")
            raise


@@ -49,7 +49,7 @@ async def _get_tenant_id_from_request(
    """
    # Check for API key
    tenant_id = extract_tenant_from_api_key_header(request)
-    if tenant_id is not None:
+    if tenant_id:
        return tenant_id

    # Check for anonymous user cookie
--- a/backend/ee/onyx/server/oauth.py
+++ b/backend/ee/onyx/server/oauth.py
@@ -36,12 +36,12 @@ from onyx.connectors.google_utils.shared_constants import (
    GoogleOAuthAuthenticationMethod,
 )
 from onyx.db.credentials import create_credential
+from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session
 from onyx.db.models import User
 from onyx.redis.redis_pool import get_redis_client
 from onyx.server.documents.models import CredentialBase
 from onyx.utils.logger import setup_logger
-from shared_configs.contextvars import get_current_tenant_id


 logger = setup_logger()
@@ -271,12 +271,12 @@ def prepare_authorization_request(
    connector: DocumentSource,
    redirect_on_success: str | None,
    user: User = Depends(current_user),
+    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> JSONResponse:
    """Used by the frontend to generate the url for the user's browser during auth request.

    Example: https://www.oauth.com/oauth2-servers/authorization/the-authorization-request/
    """
-    tenant_id = get_current_tenant_id()

    # create random oauth state param for security and to retrieve user data later
    oauth_uuid = uuid.uuid4()
@@ -329,6 +329,7 @@ def handle_slack_oauth_callback(
    state: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
+    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> JSONResponse:
    if not SlackOAuth.CLIENT_ID or not SlackOAuth.CLIENT_SECRET:
        raise HTTPException(
@@ -336,7 +337,7 @@ def handle_slack_oauth_callback(
            detail="Slack client ID or client secret is not configured.",
        )

-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    # recover the state
    padded_state = state + "=" * (
@@ -522,6 +523,7 @@ def handle_google_drive_oauth_callback(
    state: str,
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
+    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> JSONResponse:
    if not GoogleDriveOAuth.CLIENT_ID or not GoogleDriveOAuth.CLIENT_SECRET:
        raise HTTPException(
@@ -529,7 +531,7 @@ def handle_google_drive_oauth_callback(
            detail="Google Drive client ID or client secret is not configured.",
        )

-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    # recover the state
    padded_state = state + "=" * (
--- a/backend/ee/onyx/server/query_and_chat/token_limit.py
+++ b/backend/ee/onyx/server/query_and_chat/token_limit.py
@@ -28,7 +28,7 @@ from onyx.server.query_and_chat.token_limit import _user_is_rate_limited_by_glob
 from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel


-def _check_token_rate_limits(user: User | None, tenant_id: str) -> None:
+def _check_token_rate_limits(user: User | None, tenant_id: str | None) -> None:
    if user is None:
        # Unauthenticated users are only rate limited by global settings
        _user_is_rate_limited_by_global(tenant_id)
@@ -52,8 +52,8 @@ User rate limits
 """


-def _user_is_rate_limited(user_id: UUID, tenant_id: str) -> None:
-    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+def _user_is_rate_limited(user_id: UUID, tenant_id: str | None) -> None:
+    with get_session_with_tenant(tenant_id) as db_session:
        user_rate_limits = fetch_all_user_token_rate_limits(
            db_session=db_session, enabled_only=True, ordered=False
        )
@@ -94,7 +94,7 @@ User Group rate limits


 def _user_is_rate_limited_by_group(user_id: UUID, tenant_id: str | None) -> None:
-    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+    with get_session_with_tenant(tenant_id) as db_session:
        group_rate_limits = _fetch_all_user_group_rate_limits(user_id, db_session)

        if group_rate_limits:
--- a/backend/ee/onyx/server/tenants/api.py
+++ b/backend/ee/onyx/server/tenants/api.py
@@ -41,15 +41,14 @@ from onyx.auth.users import User
 from onyx.configs.app_configs import WEB_DOMAIN
 from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
 from onyx.db.auth import get_user_count
+from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session
-from onyx.db.engine import get_session_with_shared_schema
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.users import delete_user_from_db
 from onyx.db.users import get_user_by_email
 from onyx.server.manage.models import UserByEmail
 from onyx.utils.logger import setup_logger
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
-from shared_configs.contextvars import get_current_tenant_id

 stripe.api_key = STRIPE_SECRET_KEY
 logger = setup_logger()
@@ -58,14 +57,13 @@ router = APIRouter(prefix="/tenants")

@router.get("/anonymous-user-path")
 async def get_anonymous_user_path_api(
+    tenant_id: str | None = Depends(get_current_tenant_id),
    _: User | None = Depends(current_admin_user),
 ) -> AnonymousUserPath:
-    tenant_id = get_current_tenant_id()
-
    if tenant_id is None:
        raise HTTPException(status_code=404, detail="Tenant not found")

-    with get_session_with_shared_schema() as db_session:
+    with get_session_with_tenant(tenant_id=None) as db_session:
        current_path = get_anonymous_user_path(tenant_id, db_session)

    return AnonymousUserPath(anonymous_user_path=current_path)
@@ -74,15 +72,15 @@ async def get_anonymous_user_path_api(
@router.post("/anonymous-user-path")
 async def set_anonymous_user_path_api(
    anonymous_user_path: str,
+    tenant_id: str = Depends(get_current_tenant_id),
    _: User | None = Depends(current_admin_user),
 ) -> None:
-    tenant_id = get_current_tenant_id()
    try:
        validate_anonymous_user_path(anonymous_user_path)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))

-    with get_session_with_shared_schema() as db_session:
+    with get_session_with_tenant(tenant_id=None) as db_session:
        try:
            modify_anonymous_user_path(tenant_id, anonymous_user_path, db_session)
        except IntegrityError:
@@ -103,7 +101,7 @@ async def login_as_anonymous_user(
    anonymous_user_path: str,
    _: User | None = Depends(optional_user),
 ) -> Response:
-    with get_session_with_shared_schema() as db_session:
+    with get_session_with_tenant(tenant_id=None) as db_session:
        tenant_id = get_tenant_id_for_anonymous_user_path(
            anonymous_user_path, db_session
        )
@@ -152,17 +150,14 @@ async def billing_information(
    _: User = Depends(current_admin_user),
 ) -> BillingInformation | SubscriptionStatusResponse:
    logger.info("Fetching billing information")
-    tenant_id = get_current_tenant_id()
-    return fetch_billing_information(tenant_id)
+    return fetch_billing_information(CURRENT_TENANT_ID_CONTEXTVAR.get())


@router.post("/create-customer-portal-session")
-async def create_customer_portal_session(
-    _: User = Depends(current_admin_user),
-) -> dict:
-    tenant_id = get_current_tenant_id()
-
+async def create_customer_portal_session(_: User = Depends(current_admin_user)) -> dict:
    try:
+        # Fetch tenant_id and current tenant's information
+        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
        stripe_info = fetch_tenant_stripe_information(tenant_id)
        stripe_customer_id = stripe_info.get("stripe_customer_id")
        if not stripe_customer_id:
@@ -186,8 +181,6 @@ async def create_subscription_session(
 ) -> SubscriptionSessionResponse:
    try:
        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
-        if not tenant_id:
-            raise HTTPException(status_code=400, detail="Tenant ID not found")
        session_id = fetch_stripe_checkout_session(tenant_id)
        return SubscriptionSessionResponse(sessionId=session_id)

@@ -204,7 +197,7 @@ async def impersonate_user(
    """Allows a cloud superuser to impersonate another user by generating an impersonation JWT token"""
    tenant_id = get_tenant_id_for_email(impersonate_request.email)

-    with get_session_with_tenant(tenant_id=tenant_id) as tenant_session:
+    with get_session_with_tenant(tenant_id) as tenant_session:
        user_to_impersonate = get_user_by_email(
            impersonate_request.email, tenant_session
        )
@@ -228,9 +221,8 @@ async def leave_organization(
    user_email: UserByEmail,
    current_user: User | None = Depends(current_admin_user),
    db_session: Session = Depends(get_session),
+    tenant_id: str = Depends(get_current_tenant_id),
 ) -> None:
-    tenant_id = get_current_tenant_id()
-
    if current_user is None or current_user.email != user_email.user_email:
        raise HTTPException(
            status_code=403, detail="You can only leave the organization as yourself"
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -118,7 +118,7 @@ async def provision_tenant(tenant_id: str, email: str) -> None:
        # Await the Alembic migrations
        await asyncio.to_thread(run_alembic_migrations, tenant_id)

-        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            configure_default_api_keys(db_session)

            current_search_settings = (
@@ -134,7 +134,7 @@ async def provision_tenant(tenant_id: str, email: str) -> None:

        add_users_to_tenant([email], tenant_id)

-        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            create_milestone_and_report(
                user=None,
                distinct_id=tenant_id,
--- a/backend/ee/onyx/server/tenants/user_mapping.py
+++ b/backend/ee/onyx/server/tenants/user_mapping.py
@@ -28,7 +28,7 @@ def get_tenant_id_for_email(email: str) -> str:


 def user_owns_a_tenant(email: str) -> bool:
-    with get_session_with_tenant(tenant_id=None) as db_session:
+    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
        result = (
            db_session.query(UserTenantMapping)
            .filter(UserTenantMapping.email == email)
@@ -38,7 +38,7 @@ def user_owns_a_tenant(email: str) -> bool:


 def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:
-    with get_session_with_tenant(tenant_id=None) as db_session:
+    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
        try:
            for email in emails:
                db_session.add(UserTenantMapping(email=email, tenant_id=tenant_id))
@@ -48,7 +48,7 @@ def add_users_to_tenant(emails: list[str], tenant_id: str) -> None:


 def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:
-    with get_session_with_tenant(tenant_id=None) as db_session:
+    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
        try:
            mappings_to_delete = (
                db_session.query(UserTenantMapping)
@@ -71,7 +71,7 @@ def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None:


 def remove_all_users_from_tenant(tenant_id: str) -> None:
-    with get_session_with_tenant(tenant_id=None) as db_session:
+    with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session:
        db_session.query(UserTenantMapping).filter(
            UserTenantMapping.tenant_id == tenant_id
        ).delete()
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -98,17 +98,12 @@ class CloudEmbedding:
            return final_embeddings
        except Exception as e:
            error_string = (
-                f"Exception embedding text with OpenAI - {type(e)}: "
-                f"Model: {model} "
-                f"Provider: {self.provider} "
-                f"Exception: {e}"
+                f"Error embedding text with OpenAI: {str(e)} \n"
+                f"Model: {model} \n"
+                f"Provider: {self.provider} \n"
+                f"Texts: {texts}"
            )
            logger.error(error_string)
-
-            # only log text when it's not an authentication error.
-            if not isinstance(e, openai.AuthenticationError):
-                logger.debug(f"Exception texts: {texts}")
-
            raise RuntimeError(error_string)

    async def _embed_cohere(
--- a/backend/onyx/agents/agent_search/basic/graph_builder.py
+++ b/backend/onyx/agents/agent_search/basic/graph_builder.py
@@ -5,14 +5,14 @@ from langgraph.graph import StateGraph
 from onyx.agents.agent_search.basic.states import BasicInput
 from onyx.agents.agent_search.basic.states import BasicOutput
 from onyx.agents.agent_search.basic.states import BasicState
-from onyx.agents.agent_search.orchestration.nodes.call_tool import call_tool
-from onyx.agents.agent_search.orchestration.nodes.choose_tool import choose_tool
+from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
+    basic_use_tool_response,
+)
+from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
 from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
    prepare_tool_input,
 )
-from onyx.agents.agent_search.orchestration.nodes.use_tool_response import (
-    basic_use_tool_response,
-)
+from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -33,13 +33,13 @@ def basic_graph_builder() -> StateGraph:
    )

    graph.add_node(
-        node="choose_tool",
-        action=choose_tool,
+        node="llm_tool_choice",
+        action=llm_tool_choice,
    )

    graph.add_node(
-        node="call_tool",
-        action=call_tool,
+        node="tool_call",
+        action=tool_call,
    )

    graph.add_node(
@@ -51,12 +51,12 @@ def basic_graph_builder() -> StateGraph:

    graph.add_edge(start_key=START, end_key="prepare_tool_input")

-    graph.add_edge(start_key="prepare_tool_input", end_key="choose_tool")
+    graph.add_edge(start_key="prepare_tool_input", end_key="llm_tool_choice")

-    graph.add_conditional_edges("choose_tool", should_continue, ["call_tool", END])
+    graph.add_conditional_edges("llm_tool_choice", should_continue, ["tool_call", END])

    graph.add_edge(
-        start_key="call_tool",
+        start_key="tool_call",
        end_key="basic_use_tool_response",
    )

@@ -73,7 +73,7 @@ def should_continue(state: BasicState) -> str:
        # If there are no tool calls, basic graph already streamed the answer
        END
        if state.tool_choice is None
-        else "call_tool"
+        else "tool_call"
    )


@@ -85,7 +85,7 @@ if __name__ == "__main__":

    graph = basic_graph_builder()
    compiled_graph = graph.compile()
-    input = BasicInput(unused=True)
+    input = BasicInput(_unused=True)
    primary_llm, fast_llm = get_default_llms()
    with get_session_context_manager() as db_session:
        config, _ = get_test_config(
--- a/backend/onyx/agents/agent_search/basic/states.py
+++ b/backend/onyx/agents/agent_search/basic/states.py
@@ -17,7 +17,7 @@ from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
 class BasicInput(BaseModel):
    # Langgraph needs a nonempty input, but we pass in all static
    # data through a RunnableConfig.
-    unused: bool = True
+    _unused: bool = True


 ## Graph Output State
--- a/backend/onyx/agents/agent_search/core_state.py
+++ b/backend/onyx/agents/agent_search/core_state.py
@@ -9,6 +9,7 @@ class CoreState(BaseModel):
    This is the core state that is shared across all subgraphs.
    """

+    base_question: str = ""
    log_messages: Annotated[list[str], add] = []


@@ -17,4 +18,4 @@ class SubgraphCoreState(BaseModel):
    This is the core state that is shared across all subgraphs.
    """

-    log_messages: Annotated[list[str], add] = []
+    log_messages: Annotated[list[str], add]
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
@@ -1,8 +1,8 @@
 from datetime import datetime
 from typing import cast

-from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
+from langchain_core.messages import merge_message_runs
 from langchain_core.runnables.config import RunnableConfig

 from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
@@ -12,45 +12,14 @@ from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer
    SubQuestionAnswerCheckUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
-from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
-    binary_string_test,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_POSITIVE_VALUE_STR,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import AgentLLMErrorType
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
-from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_CHECK
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="LLM Timeout Error. The sub-answer will be treated as 'relevant'",
-    rate_limit="LLM Rate Limit Error. The sub-answer will be treated as 'relevant'",
-    general_error="General LLM Error. The sub-answer will be treated as 'relevant'",
-)


-@log_function_time(print_only=True)
 def check_sub_answer(
    state: AnswerQuestionState, config: RunnableConfig
 ) -> SubQuestionAnswerCheckUpdate:
@@ -84,42 +53,14 @@ def check_sub_answer(

    graph_config = cast(GraphConfig, config["metadata"]["config"])
    fast_llm = graph_config.tooling.fast_llm
-    agent_error: AgentErrorLog | None = None
-    response: BaseMessage | None = None
-    try:
-        response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_SUBANSWER_CHECK,
-            fast_llm.invoke,
+    response = list(
+        fast_llm.stream(
            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK,
        )
+    )

-        quality_str: str = cast(str, response.content)
-        answer_quality = binary_string_test(
-            text=quality_str, positive_value=AGENT_POSITIVE_VALUE_STR
-        )
-        log_result = f"Answer quality: {quality_str}"
-
-    except (LLMTimeoutError, TimeoutError):
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.TIMEOUT,
-            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-            error_result=_llm_node_error_strings.timeout,
-        )
-        answer_quality = True
-        log_result = agent_error.error_result
-        logger.error("LLM Timeout Error - check sub answer")
-
-    except LLMRateLimitError:
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.RATE_LIMIT,
-            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-            error_result=_llm_node_error_strings.rate_limit,
-        )
-
-        answer_quality = True
-        log_result = agent_error.error_result
-        logger.error("LLM Rate Limit Error - check sub answer")
+    quality_str: str = merge_message_runs(response, chunk_separator="")[0].content
+    answer_quality = "yes" in quality_str.lower()

    return SubQuestionAnswerCheckUpdate(
        answer_quality=answer_quality,
@@ -128,7 +69,7 @@ def check_sub_answer(
                graph_component="initial  - generate individual sub answer",
                node_name="check sub answer",
                node_start_time=node_start_time,
-                result=log_result,
+                result=f"Answer quality: {quality_str}",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from typing import Any
 from typing import cast

 from langchain_core.messages import merge_message_runs
@@ -15,23 +16,6 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_sub_question_answer_prompt,
 )
-from onyx.agents.agent_search.shared_graph_utils.calculations import (
-    dedup_sort_inference_section_list,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AgentLLMErrorType,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    LLM_ANSWER_ERROR_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
@@ -46,25 +30,12 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
-from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import NO_RECOVERED_DOCS
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time

 logger = setup_logger()

-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="LLM Timeout Error. A sub-answer could not be constructed and the sub-question will be ignored.",
-    rate_limit="LLM Rate Limit Error. A sub-answer could not be constructed and the sub-question will be ignored.",
-    general_error="General LLM Error. A sub-answer could not be constructed and the sub-question will be ignored.",
-)

-
-@log_function_time(print_only=True)
 def generate_sub_answer(
    state: AnswerQuestionState,
    config: RunnableConfig,
@@ -80,17 +51,12 @@ def generate_sub_answer(
    state.verified_reranked_documents
    level, question_num = parse_question_id(state.question_id)
    context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
-
-    context_docs = dedup_sort_inference_section_list(context_docs)
-
    persona_contextualized_prompt = get_persona_agent_prompt_expressions(
        graph_config.inputs.search_request.persona
    ).contextualized_prompt

    if len(context_docs) == 0:
        answer_str = NO_RECOVERED_DOCS
-        cited_documents: list = []
-        log_results = "No documents retrieved"
        write_custom_event(
            "sub_answers",
            AgentAnswerPiece(
@@ -111,75 +77,43 @@ def generate_sub_answer(
            config=fast_llm.config,
        )

+        response: list[str | list[str | dict[str, Any]]] = []
        dispatch_timings: list[float] = []
-        agent_error: AgentErrorLog | None = None
-        response: list[str] = []
-
-        def stream_sub_answer() -> list[str]:
-            for message in fast_llm.stream(
-                prompt=msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION,
-            ):
-                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-                content = message.content
-                if not isinstance(content, str):
-                    raise ValueError(
-                        f"Expected content to be a string, but got {type(content)}"
-                    )
-                start_stream_token = datetime.now()
-                write_custom_event(
-                    "sub_answers",
-                    AgentAnswerPiece(
-                        answer_piece=content,
-                        level=level,
-                        level_question_num=question_num,
-                        answer_type="agent_sub_answer",
-                    ),
-                    writer,
+        for message in fast_llm.stream(
+            prompt=msg,
+        ):
+            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+            content = message.content
+            if not isinstance(content, str):
+                raise ValueError(
+                    f"Expected content to be a string, but got {type(content)}"
                )
-                end_stream_token = datetime.now()
-                dispatch_timings.append(
-                    (end_stream_token - start_stream_token).microseconds
-                )
-                response.append(content)
-            return response
-
-        try:
-            response = run_with_timeout(
-                AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION,
-                stream_sub_answer,
+            start_stream_token = datetime.now()
+            write_custom_event(
+                "sub_answers",
+                AgentAnswerPiece(
+                    answer_piece=content,
+                    level=level,
+                    level_question_num=question_num,
+                    answer_type="agent_sub_answer",
+                ),
+                writer,
            )
-
-        except (LLMTimeoutError, TimeoutError):
-            agent_error = AgentErrorLog(
-                error_type=AgentLLMErrorType.TIMEOUT,
-                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-                error_result=_llm_node_error_strings.timeout,
+            end_stream_token = datetime.now()
+            dispatch_timings.append(
+                (end_stream_token - start_stream_token).microseconds
            )
-            logger.error("LLM Timeout Error - generate sub answer")
-        except LLMRateLimitError:
-            agent_error = AgentErrorLog(
-                error_type=AgentLLMErrorType.RATE_LIMIT,
-                error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-                error_result=_llm_node_error_strings.rate_limit,
-            )
-            logger.error("LLM Rate Limit Error - generate sub answer")
+            response.append(content)

-        if agent_error:
-            answer_str = LLM_ANSWER_ERROR_MESSAGE
-            cited_documents = []
-            log_results = (
-                agent_error.error_result
-                or "Sub-answer generation failed due to LLM error"
-            )
+        answer_str = merge_message_runs(response, chunk_separator="")[0].content
+        logger.debug(
+            f"Average dispatch time: {sum(dispatch_timings) / len(dispatch_timings)}"
+        )

-        else:
-            answer_str = merge_message_runs(response, chunk_separator="")[0].content
-            answer_citation_ids = get_answer_citation_ids(answer_str)
-            cited_documents = [
-                context_docs[id] for id in answer_citation_ids if id < len(context_docs)
-            ]
-            log_results = None
+    answer_citation_ids = get_answer_citation_ids(answer_str)
+    cited_documents = [
+        context_docs[id] for id in answer_citation_ids if id < len(context_docs)
+    ]

    stop_event = StreamStopInfo(
        stop_reason=StreamStopReason.FINISHED,
@@ -197,7 +131,7 @@ def generate_sub_answer(
                graph_component="initial - generate individual sub answer",
                node_name="generate sub answer",
                node_start_time=node_start_time,
-                result=log_results or "",
+                result="",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/states.py
@@ -42,8 +42,10 @@ class SubQuestionRetrievalIngestionUpdate(LoggerUpdate, BaseModel):


 class SubQuestionAnsweringInput(SubgraphCoreState):
-    question: str
-    question_id: str
+    question: str = ""
+    question_id: str = (
+        ""  # 0_0 is original question, everything else is <level>_<question_num>.
+    )
    # level 0 is original question and first decomposition, level 1 is follow up, etc
    # question_num is a unique number per original question per level.

--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from typing import Any
 from typing import cast

 from langchain_core.messages import HumanMessage
@@ -25,31 +26,14 @@ from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
-from onyx.agents.agent_search.shared_graph_utils.calculations import (
-    get_answer_generation_documents,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AgentLLMErrorType,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
 from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResultStats
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.operators import (
-    dedup_inference_section_list,
+    dedup_inference_sections,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    dispatch_main_answer_stop_info,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
-from onyx.agents.agent_search.shared_graph_utils.utils import (
-    get_deduplicated_structured_subquestion_documents,
-)
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
@@ -58,20 +42,12 @@ from onyx.agents.agent_search.shared_graph_utils.utils import remove_document_ci
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
-from onyx.chat.models import StreamingError
-from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
-from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
+from onyx.context.search.models import InferenceSection
+from onyx.prompts.agent_search import (
+    INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS,
 )
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
-)
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
-from onyx.prompts.agent_search import INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS
 from onyx.prompts.agent_search import (
    INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
@@ -80,17 +56,8 @@ from onyx.prompts.agent_search import (
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="LLM Timeout Error. The initial answer could not be generated.",
-    rate_limit="LLM Rate Limit Error. The initial answer could not be generated.",
-    general_error="General LLM Error. The initial answer could not be generated.",
-)


-@log_function_time(print_only=True)
 def generate_initial_answer(
    state: SubQuestionRetrievalState,
    config: RunnableConfig,
@@ -106,19 +73,15 @@ def generate_initial_answer(
    question = graph_config.inputs.search_request.query
    prompt_enrichment_components = get_prompt_enrichment_components(graph_config)

-    # get all documents cited in sub-questions
-    structured_subquestion_docs = get_deduplicated_structured_subquestion_documents(
-        state.sub_question_results
-    )
-
+    sub_questions_cited_documents = state.cited_documents
    orig_question_retrieval_documents = state.orig_question_retrieved_documents

-    consolidated_context_docs = structured_subquestion_docs.cited_documents
+    consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
    counter = 0
    for original_doc_number, original_doc in enumerate(
        orig_question_retrieval_documents
    ):
-        if original_doc_number not in structured_subquestion_docs.cited_documents:
+        if original_doc_number not in sub_questions_cited_documents:
            if (
                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
                or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
@@ -127,18 +90,15 @@ def generate_initial_answer(
                counter += 1

    # sort docs by their scores - though the scores refer to different questions
-    relevant_docs = dedup_inference_section_list(consolidated_context_docs)
+    relevant_docs = dedup_inference_sections(
+        consolidated_context_docs, consolidated_context_docs
+    )

    sub_questions: list[str] = []
-
-    # Create the list of documents to stream out. Start with the
-    # ones that wil be in the context (or, if len == 0, use docs
-    # that were retrieved for the original question)
-    answer_generation_documents = get_answer_generation_documents(
-        relevant_docs=relevant_docs,
-        context_documents=structured_subquestion_docs.context_documents,
-        original_question_docs=orig_question_retrieval_documents,
-        max_docs=AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER,
+    streamed_documents = (
+        relevant_docs
+        if len(relevant_docs) > 0
+        else state.orig_question_retrieved_documents[:15]
    )

    # Use the query info from the base document retrieval
@@ -148,13 +108,11 @@ def generate_initial_answer(
        graph_config.tooling.search_tool
    ), "search_tool must be provided for agentic search"

-    relevance_list = relevance_from_docs(
-        answer_generation_documents.streaming_documents
-    )
+    relevance_list = relevance_from_docs(relevant_docs)
    for tool_response in yield_search_responses(
        query=question,
-        reranked_sections=answer_generation_documents.streaming_documents,
-        final_context_sections=answer_generation_documents.context_documents,
+        reranked_sections=streamed_documents,
+        final_context_sections=streamed_documents,
        search_query_info=query_info,
        get_section_relevance=lambda: relevance_list,
        search_tool=graph_config.tooling.search_tool,
@@ -170,7 +128,7 @@ def generate_initial_answer(
            writer,
        )

-    if len(answer_generation_documents.context_documents) == 0:
+    if len(relevant_docs) == 0:
        write_custom_event(
            "initial_agent_answer",
            AgentAnswerPiece(
@@ -234,13 +192,9 @@ def generate_initial_answer(

        sub_questions = all_sub_questions  # Replace the original assignment

-        model = (
-            graph_config.tooling.fast_llm
-            if AGENT_ANSWER_GENERATION_BY_FAST_LLM
-            else graph_config.tooling.primary_llm
-        )
+        model = graph_config.tooling.fast_llm

-        doc_context = format_docs(answer_generation_documents.context_documents)
+        doc_context = format_docs(relevant_docs)
        doc_context = trim_prompt_piece(
            config=model.config,
            prompt_piece=doc_context,
@@ -268,92 +222,32 @@ def generate_initial_answer(
            )
        ]

-        streamed_tokens: list[str] = [""]
+        streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
        dispatch_timings: list[float] = []
-
-        agent_error: AgentErrorLog | None = None
-
-        def stream_initial_answer() -> list[str]:
-            response: list[str] = []
-            for message in model.stream(
-                msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION,
-            ):
-                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-                content = message.content
-                if not isinstance(content, str):
-                    raise ValueError(
-                        f"Expected content to be a string, but got {type(content)}"
-                    )
-                start_stream_token = datetime.now()
-
-                write_custom_event(
-                    "initial_agent_answer",
-                    AgentAnswerPiece(
-                        answer_piece=content,
-                        level=0,
-                        level_question_num=0,
-                        answer_type="agent_level_answer",
-                    ),
-                    writer,
+        for message in model.stream(msg):
+            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+            content = message.content
+            if not isinstance(content, str):
+                raise ValueError(
+                    f"Expected content to be a string, but got {type(content)}"
                )
-                end_stream_token = datetime.now()
-                dispatch_timings.append(
-                    (end_stream_token - start_stream_token).microseconds
-                )
-                response.append(content)
-            return response
+            start_stream_token = datetime.now()

-        try:
-            streamed_tokens = run_with_timeout(
-                AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION,
-                stream_initial_answer,
-            )
-
-        except (LLMTimeoutError, TimeoutError):
-            agent_error = AgentErrorLog(
-                error_type=AgentLLMErrorType.TIMEOUT,
-                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-                error_result=_llm_node_error_strings.timeout,
-            )
-            logger.error("LLM Timeout Error - generate initial answer")
-
-        except LLMRateLimitError:
-            agent_error = AgentErrorLog(
-                error_type=AgentLLMErrorType.RATE_LIMIT,
-                error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-                error_result=_llm_node_error_strings.rate_limit,
-            )
-            logger.error("LLM Rate Limit Error - generate initial answer")
-
-        if agent_error:
            write_custom_event(
                "initial_agent_answer",
-                StreamingError(
-                    error=AGENT_LLM_TIMEOUT_MESSAGE,
+                AgentAnswerPiece(
+                    answer_piece=content,
+                    level=0,
+                    level_question_num=0,
+                    answer_type="agent_level_answer",
                ),
                writer,
            )
-            return InitialAnswerUpdate(
-                initial_answer=None,
-                answer_error=AgentErrorLog(
-                    error_message=agent_error.error_message or "An LLM error occurred",
-                    error_type=agent_error.error_type,
-                    error_result=agent_error.error_result,
-                ),
-                initial_agent_stats=None,
-                generated_sub_questions=sub_questions,
-                agent_base_end_time=None,
-                agent_base_metrics=None,
-                log_messages=[
-                    get_langgraph_node_log_string(
-                        graph_component="initial - generate initial answer",
-                        node_name="generate initial answer",
-                        node_start_time=node_start_time,
-                        result=agent_error.error_result or "An LLM error occurred",
-                    )
-                ],
+            end_stream_token = datetime.now()
+            dispatch_timings.append(
+                (end_stream_token - start_stream_token).microseconds
            )
+            streamed_tokens.append(content)

        logger.debug(
            f"Average dispatch time for initial answer: {sum(dispatch_timings) / len(dispatch_timings)}"
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/validate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/validate_initial_answer.py
@@ -10,10 +10,8 @@ from onyx.agents.agent_search.deep_search.main.states import (
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
-from onyx.utils.timing import log_function_time


-@log_function_time(print_only=True)
 def validate_initial_answer(
    state: SubQuestionRetrievalState,
 ) -> InitialAnswerQualityUpdate:
@@ -27,7 +25,7 @@ def validate_initial_answer(
        f"--------{node_start_time}--------Checking for base answer validity - for not set True/False manually"
    )

-    verdict = True  # not actually required as already streamed out. Refinement will do similar
+    verdict = True

    return InitialAnswerQualityUpdate(
        initial_answer_quality_eval=verdict,
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
@@ -23,8 +23,6 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_history_prompt,
 )
-from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
@@ -35,34 +33,17 @@ from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
 from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION,
-)
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
-    INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT,
+    INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH,
 )
 from onyx.prompts.agent_search import (
-    INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT,
+    INITIAL_QUESTION_DECOMPOSITION_PROMPT,
 )
 from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time

 logger = setup_logger()

-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="LLM Timeout Error. Sub-questions could not be generated.",
-    rate_limit="LLM Rate Limit Error. Sub-questions could not be generated.",
-    general_error="General LLM Error. Sub-questions could not be generated.",
-)

-
-@log_function_time(print_only=True)
 def decompose_orig_question(
    state: SubQuestionRetrievalState,
    config: RunnableConfig,
@@ -104,15 +85,15 @@ def decompose_orig_question(
            ]
        )

-        decomposition_prompt = INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT.format(
-            question=question, sample_doc_str=sample_doc_str, history=history
+        decomposition_prompt = (
+            INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH.format(
+                question=question, sample_doc_str=sample_doc_str, history=history
+            )
        )

    else:
-        decomposition_prompt = (
-            INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT.format(
-                question=question, history=history
-            )
+        decomposition_prompt = INITIAL_QUESTION_DECOMPOSITION_PROMPT.format(
+            question=question, history=history
        )

    # Start decomposition
@@ -131,44 +112,32 @@ def decompose_orig_question(
    )

    # dispatches custom events for subquestion tokens, adding in subquestion ids.
+    streamed_tokens = dispatch_separated(
+        model.stream(msg),
+        dispatch_subquestion(0, writer),
+        sep_callback=dispatch_subquestion_sep(0, writer),
+    )

-    streamed_tokens: list[BaseMessage_Content] = []
+    stop_event = StreamStopInfo(
+        stop_reason=StreamStopReason.FINISHED,
+        stream_type=StreamType.SUB_QUESTIONS,
+        level=0,
+    )
+    write_custom_event("stream_finished", stop_event, writer)

-    try:
-        streamed_tokens = run_with_timeout(
-            AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION,
-            dispatch_separated,
-            model.stream(
-                msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION,
-            ),
-            dispatch_subquestion(0, writer),
-            sep_callback=dispatch_subquestion_sep(0, writer),
-        )
+    deomposition_response = merge_content(*streamed_tokens)

-        decomposition_response = merge_content(*streamed_tokens)
+    # this call should only return strings. Commenting out for efficiency
+    # assert [type(tok) == str for tok in streamed_tokens]

-        list_of_subqs = cast(str, decomposition_response).split("\n")
+    # use no-op cast() instead of str() which runs code
+    # list_of_subquestions = clean_and_parse_list_string(cast(str, response))
+    list_of_subqs = cast(str, deomposition_response).split("\n")

-        initial_sub_questions = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
-        log_result = f"decomposed original question into {len(initial_sub_questions)} subquestions"
-
-        stop_event = StreamStopInfo(
-            stop_reason=StreamStopReason.FINISHED,
-            stream_type=StreamType.SUB_QUESTIONS,
-            level=0,
-        )
-        write_custom_event("stream_finished", stop_event, writer)
-
-    except (LLMTimeoutError, TimeoutError) as e:
-        logger.error("LLM Timeout Error - decompose orig question")
-        raise e  # fail loudly on this critical step
-    except LLMRateLimitError as e:
-        logger.error("LLM Rate Limit Error - decompose orig question")
-        raise e
+    decomp_list: list[str] = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]

    return InitialQuestionDecompositionUpdate(
-        initial_sub_questions=initial_sub_questions,
+        initial_sub_questions=decomp_list,
        agent_start_time=agent_start_time,
        agent_refined_start_time=None,
        agent_refined_end_time=None,
@@ -182,7 +151,7 @@ def decompose_orig_question(
                graph_component="initial - generate sub answers",
                node_name="decompose original question",
                node_start_time=node_start_time,
-                result=log_result,
+                result=f"decomposed original question into {len(decomp_list)} subquestions",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/edges.py
@@ -25,7 +25,7 @@ logger = setup_logger()

 def route_initial_tool_choice(
    state: MainState, config: RunnableConfig
-) -> Literal["call_tool", "start_agent_search", "logging_node"]:
+) -> Literal["tool_call", "start_agent_search", "logging_node"]:
    """
    LangGraph edge to route to agent search.
    """
@@ -38,7 +38,7 @@ def route_initial_tool_choice(
        ):
            return "start_agent_search"
        else:
-            return "call_tool"
+            return "tool_call"
    else:
        return "logging_node"

--- a/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
@@ -26,8 +26,8 @@ from onyx.agents.agent_search.deep_search.main.nodes.decide_refinement_need impo
 from onyx.agents.agent_search.deep_search.main.nodes.extract_entities_terms import (
    extract_entities_terms,
 )
-from onyx.agents.agent_search.deep_search.main.nodes.generate_validate_refined_answer import (
-    generate_validate_refined_answer,
+from onyx.agents.agent_search.deep_search.main.nodes.generate_refined_answer import (
+    generate_refined_answer,
 )
 from onyx.agents.agent_search.deep_search.main.nodes.ingest_refined_sub_answers import (
    ingest_refined_sub_answers,
@@ -43,14 +43,14 @@ from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.deep_search.refinement.consolidate_sub_answers.graph_builder import (
    answer_refined_query_graph_builder,
 )
-from onyx.agents.agent_search.orchestration.nodes.call_tool import call_tool
-from onyx.agents.agent_search.orchestration.nodes.choose_tool import choose_tool
+from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
+    basic_use_tool_response,
+)
+from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
 from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
    prepare_tool_input,
 )
-from onyx.agents.agent_search.orchestration.nodes.use_tool_response import (
-    basic_use_tool_response,
-)
+from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
 from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
 from onyx.utils.logger import setup_logger

@@ -77,13 +77,13 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
    # Choose the initial tool
    graph.add_node(
        node="initial_tool_choice",
-        action=choose_tool,
+        action=llm_tool_choice,
    )

    # Call the tool, if required
    graph.add_node(
-        node="call_tool",
-        action=call_tool,
+        node="tool_call",
+        action=tool_call,
    )

    # Use the tool response
@@ -126,8 +126,8 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:

    # Node to generate the refined answer
    graph.add_node(
-        node="generate_validate_refined_answer",
-        action=generate_validate_refined_answer,
+        node="generate_refined_answer",
+        action=generate_refined_answer,
    )

    # Early node to extract the entities and terms from the initial answer,
@@ -168,11 +168,11 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:
    graph.add_conditional_edges(
        "initial_tool_choice",
        route_initial_tool_choice,
-        ["call_tool", "start_agent_search", "logging_node"],
+        ["tool_call", "start_agent_search", "logging_node"],
    )

    graph.add_edge(
-        start_key="call_tool",
+        start_key="tool_call",
        end_key="basic_use_tool_response",
    )
    graph.add_edge(
@@ -215,11 +215,11 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:

    graph.add_edge(
        start_key="ingest_refined_sub_answers",
-        end_key="generate_validate_refined_answer",
+        end_key="generate_refined_answer",
    )

    graph.add_edge(
-        start_key="generate_validate_refined_answer",
+        start_key="generate_refined_answer",
        end_key="compare_answers",
    )
    graph.add_edge(
@@ -252,7 +252,9 @@ if __name__ == "__main__":
            db_session, primary_llm, fast_llm, search_request
        )

-        inputs = MainInput(log_messages=[])
+        inputs = MainInput(
+            base_question=graph_config.inputs.search_request.query, log_messages=[]
+        )

        for thing in compiled_graph.stream(
            input=inputs,
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
@@ -1,7 +1,6 @@
 from datetime import datetime
 from typing import cast

-from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.runnables import RunnableConfig
 from langgraph.types import StreamWriter
@@ -11,53 +10,16 @@ from onyx.agents.agent_search.deep_search.main.states import (
 )
 from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.models import GraphConfig
-from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
-    binary_string_test,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_POSITIVE_VALUE_STR,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AgentLLMErrorType,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import RefinedAnswerImprovement
-from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_COMPARE_ANSWERS
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
 )
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="The LLM timed out, and the answers could not be compared.",
-    rate_limit="The LLM encountered a rate limit, and the answers could not be compared.",
-    general_error="The LLM encountered an error, and the answers could not be compared.",
-)
-
-_ANSWER_QUALITY_NOT_SUFFICIENT_MESSAGE = (
-    "Answer quality is not sufficient, so stay with the initial answer."
-)


-@log_function_time(print_only=True)
 def compare_answers(
    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> InitialRefinedAnswerComparisonUpdate:
@@ -72,78 +34,21 @@ def compare_answers(
    initial_answer = state.initial_answer
    refined_answer = state.refined_answer

-    # if answer quality is not sufficient, then stay with the initial answer
-    if not state.refined_answer_quality:
-        write_custom_event(
-            "refined_answer_improvement",
-            RefinedAnswerImprovement(
-                refined_answer_improvement=False,
-            ),
-            writer,
-        )
-
-        return InitialRefinedAnswerComparisonUpdate(
-            refined_answer_improvement_eval=False,
-            log_messages=[
-                get_langgraph_node_log_string(
-                    graph_component="main",
-                    node_name="compare answers",
-                    node_start_time=node_start_time,
-                    result=_ANSWER_QUALITY_NOT_SUFFICIENT_MESSAGE,
-                )
-            ],
-        )
-
    compare_answers_prompt = INITIAL_REFINED_ANSWER_COMPARISON_PROMPT.format(
        question=question, initial_answer=initial_answer, refined_answer=refined_answer
    )

    msg = [HumanMessage(content=compare_answers_prompt)]

-    agent_error: AgentErrorLog | None = None
    # Get the rewritten queries in a defined format
    model = graph_config.tooling.fast_llm
-    resp: BaseMessage | None = None
-    refined_answer_improvement: bool | None = None
+
    # no need to stream this
-    try:
-        resp = run_with_timeout(
-            AGENT_TIMEOUT_LLM_COMPARE_ANSWERS,
-            model.invoke,
-            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS,
-        )
+    resp = model.invoke(msg)

-    except (LLMTimeoutError, TimeoutError):
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.TIMEOUT,
-            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-            error_result=_llm_node_error_strings.timeout,
-        )
-        logger.error("LLM Timeout Error - compare answers")
-        # continue as True in this support step
-    except LLMRateLimitError:
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.RATE_LIMIT,
-            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-            error_result=_llm_node_error_strings.rate_limit,
-        )
-        logger.error("LLM Rate Limit Error - compare answers")
-        # continue as True in this support step
-
-    if agent_error or resp is None:
-        refined_answer_improvement = True
-        if agent_error:
-            log_result = agent_error.error_result
-        else:
-            log_result = "An answer could not be generated."
-
-    else:
-        refined_answer_improvement = binary_string_test(
-            text=cast(str, resp.content),
-            positive_value=AGENT_POSITIVE_VALUE_STR,
-        )
-        log_result = f"Answer comparison: {refined_answer_improvement}"
+    refined_answer_improvement = (
+        isinstance(resp.content, str) and "yes" in resp.content.lower()
+    )

    write_custom_event(
        "refined_answer_improvement",
@@ -160,7 +65,7 @@ def compare_answers(
                graph_component="main",
                node_name="compare answers",
                node_start_time=node_start_time,
-                result=log_result,
+                result=f"Answer comparison: {refined_answer_improvement}",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
@@ -21,18 +21,6 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_history_prompt,
 )
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AgentLLMErrorType,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
-from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    format_entity_term_extraction,
@@ -42,35 +30,12 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
-from onyx.chat.models import StreamingError
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION,
-)
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
-    REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS,
+    REFINEMENT_QUESTION_DECOMPOSITION_PROMPT,
 )
 from onyx.tools.models import ToolCallKickoff
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-_ANSWERED_SUBQUESTIONS_DIVIDER = "\n\n---\n\n"
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="The LLM timed out. The sub-questions could not be generated.",
-    rate_limit="The LLM encountered a rate limit. The sub-questions could not be generated.",
-    general_error="The LLM encountered an error. The sub-questions could not be generated.",
-)


-@log_function_time(print_only=True)
 def create_refined_sub_questions(
    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> RefinedQuestionDecompositionUpdate:
@@ -107,10 +72,8 @@ def create_refined_sub_questions(

    initial_question_answers = state.sub_question_results

-    addressed_subquestions_with_answers = [
-        f"Subquestion: {x.question}\nSubanswer:\n{x.answer}"
-        for x in initial_question_answers
-        if x.verified_high_quality and x.answer
+    addressed_question_list = [
+        x.question for x in initial_question_answers if x.verified_high_quality
    ]

    failed_question_list = [
@@ -119,14 +82,12 @@ def create_refined_sub_questions(

    msg = [
        HumanMessage(
-            content=REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS.format(
+            content=REFINEMENT_QUESTION_DECOMPOSITION_PROMPT.format(
                question=question,
                history=history,
                entity_term_extraction_str=entity_term_extraction_str,
                base_answer=base_answer,
-                answered_subquestions_with_answers=_ANSWERED_SUBQUESTIONS_DIVIDER.join(
-                    addressed_subquestions_with_answers
-                ),
+                answered_sub_questions="\n - ".join(addressed_question_list),
                failed_sub_questions="\n - ".join(failed_question_list),
            ),
        )
@@ -135,67 +96,29 @@ def create_refined_sub_questions(
    # Grader
    model = graph_config.tooling.fast_llm

-    agent_error: AgentErrorLog | None = None
-    streamed_tokens: list[BaseMessage_Content] = []
-    try:
-        streamed_tokens = run_with_timeout(
-            AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION,
-            dispatch_separated,
-            model.stream(
-                msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION,
-            ),
-            dispatch_subquestion(1, writer),
-            sep_callback=dispatch_subquestion_sep(1, writer),
-        )
-    except (LLMTimeoutError, TimeoutError):
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.TIMEOUT,
-            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-            error_result=_llm_node_error_strings.timeout,
-        )
-        logger.error("LLM Timeout Error - create refined sub questions")
-
-    except LLMRateLimitError:
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.RATE_LIMIT,
-            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-            error_result=_llm_node_error_strings.rate_limit,
-        )
-        logger.error("LLM Rate Limit Error - create refined sub questions")
-
-    if agent_error:
-        refined_sub_question_dict: dict[int, RefinementSubQuestion] = {}
-        log_result = agent_error.error_result
-        write_custom_event(
-            "refined_sub_question_creation_error",
-            StreamingError(
-                error="Your LLM was not able to create refined sub questions in time and timed out. Please try again.",
-            ),
-            writer,
-        )
+    streamed_tokens = dispatch_separated(
+        model.stream(msg),
+        dispatch_subquestion(1, writer),
+        sep_callback=dispatch_subquestion_sep(1, writer),
+    )
+    response = merge_content(*streamed_tokens)

+    if isinstance(response, str):
+        parsed_response = [q for q in response.split("\n") if q.strip() != ""]
    else:
-        response = merge_content(*streamed_tokens)
+        raise ValueError("LLM response is not a string")

-        if isinstance(response, str):
-            parsed_response = [q for q in response.split("\n") if q.strip() != ""]
-        else:
-            raise ValueError("LLM response is not a string")
+    refined_sub_question_dict = {}
+    for sub_question_num, sub_question in enumerate(parsed_response):
+        refined_sub_question = RefinementSubQuestion(
+            sub_question=sub_question,
+            sub_question_id=make_question_id(1, sub_question_num + 1),
+            verified=False,
+            answered=False,
+            answer="",
+        )

-        refined_sub_question_dict = {}
-        for sub_question_num, sub_question in enumerate(parsed_response):
-            refined_sub_question = RefinementSubQuestion(
-                sub_question=sub_question,
-                sub_question_id=make_question_id(1, sub_question_num + 1),
-                verified=False,
-                answered=False,
-                answer="",
-            )
-
-            refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
-
-        log_result = f"Created {len(refined_sub_question_dict)} refined sub questions"
+        refined_sub_question_dict[sub_question_num + 1] = refined_sub_question

    return RefinedQuestionDecompositionUpdate(
        refined_sub_questions=refined_sub_question_dict,
@@ -205,7 +128,7 @@ def create_refined_sub_questions(
                graph_component="main",
                node_name="create refined sub questions",
                node_start_time=node_start_time,
-                result=log_result,
+                result=f"Created {len(refined_sub_question_dict)} refined sub questions",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
@@ -11,10 +11,8 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
-from onyx.utils.timing import log_function_time


-@log_function_time(print_only=True)
 def decide_refinement_need(
    state: MainState, config: RunnableConfig
 ) -> RequireRefinemenEvalUpdate:
@@ -28,19 +26,6 @@ def decide_refinement_need(

    decision = True  # TODO: just for current testing purposes

-    if state.answer_error:
-        return RequireRefinemenEvalUpdate(
-            require_refined_answer_eval=False,
-            log_messages=[
-                get_langgraph_node_log_string(
-                    graph_component="main",
-                    node_name="decide refinement need",
-                    node_start_time=node_start_time,
-                    result="Timeout Error",
-                )
-            ],
-        )
-
    log_messages = [
        get_langgraph_node_log_string(
            graph_component="main",
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
@@ -21,22 +21,11 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION,
-)
 from onyx.configs.constants import NUM_EXPLORATORY_DOCS
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time


-@log_function_time(print_only=True)
 def extract_entities_terms(
    state: MainState, config: RunnableConfig
 ) -> EntityTermExtractionUpdate:
@@ -90,42 +79,29 @@ def extract_entities_terms(
    ]
    fast_llm = graph_config.tooling.fast_llm
    # Grader
+    llm_response = fast_llm.invoke(
+        prompt=msg,
+    )
+
+    cleaned_response = (
+        str(llm_response.content).replace("```json\n", "").replace("\n```", "")
+    )
+    first_bracket = cleaned_response.find("{")
+    last_bracket = cleaned_response.rfind("}")
+    cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
+
    try:
-        llm_response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION,
-            fast_llm.invoke,
-            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION,
+        entity_extraction_result = EntityExtractionResult.model_validate_json(
+            cleaned_response
        )
-
-        cleaned_response = (
-            str(llm_response.content).replace("```json\n", "").replace("\n```", "")
-        )
-        first_bracket = cleaned_response.find("{")
-        last_bracket = cleaned_response.rfind("}")
-        cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
-
-        try:
-            entity_extraction_result = EntityExtractionResult.model_validate_json(
-                cleaned_response
-            )
-        except ValueError:
-            logger.error(
-                "Failed to parse LLM response as JSON in Entity-Term Extraction"
-            )
-            entity_extraction_result = EntityExtractionResult(
-                retrieved_entities_relationships=EntityRelationshipTermExtraction(),
-            )
-    except (LLMTimeoutError, TimeoutError):
-        logger.error("LLM Timeout Error - extract entities terms")
+    except ValueError:
+        logger.error("Failed to parse LLM response as JSON in Entity-Term Extraction")
        entity_extraction_result = EntityExtractionResult(
-            retrieved_entities_relationships=EntityRelationshipTermExtraction(),
-        )
-
-    except LLMRateLimitError:
-        logger.error("LLM Rate Limit Error - extract entities terms")
-        entity_extraction_result = EntityExtractionResult(
-            retrieved_entities_relationships=EntityRelationshipTermExtraction(),
+            retrieved_entities_relationships=EntityRelationshipTermExtraction(
+                entities=[],
+                relationships=[],
+                terms=[],
+            ),
        )

    return EntityTermExtractionUpdate(
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from typing import Any
 from typing import cast

 from langchain_core.messages import HumanMessage
@@ -10,49 +11,27 @@ from onyx.agents.agent_search.deep_search.main.models import (
    AgentRefinedMetrics,
 )
 from onyx.agents.agent_search.deep_search.main.operations import get_query_info
+from onyx.agents.agent_search.deep_search.main.operations import logger
 from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.deep_search.main.states import (
    RefinedAnswerUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
-from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
-    binary_string_test_after_answer_separator,
-)
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    get_prompt_enrichment_components,
 )
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
-from onyx.agents.agent_search.shared_graph_utils.calculations import (
-    get_answer_generation_documents,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import AGENT_ANSWER_SEPARATOR
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_POSITIVE_VALUE_STR,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AgentLLMErrorType,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.models import InferenceSection
 from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats
 from onyx.agents.agent_search.shared_graph_utils.operators import (
-    dedup_inference_section_list,
+    dedup_inference_sections,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    dispatch_main_answer_stop_info,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
-from onyx.agents.agent_search.shared_graph_utils.utils import (
-    get_deduplicated_structured_subquestion_documents,
-)
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
@@ -64,58 +43,26 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
-from onyx.chat.models import StreamingError
-from onyx.configs.agent_configs import AGENT_ANSWER_GENERATION_BY_FAST_LLM
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
-from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION,
-)
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION,
-)
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS,
 )
 from onyx.prompts.agent_search import (
    REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
-from onyx.prompts.agent_search import (
-    REFINED_ANSWER_VALIDATION_PROMPT,
-)
 from onyx.prompts.agent_search import (
    SUB_QUESTION_ANSWER_TEMPLATE_REFINED,
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="The LLM timed out. The refined answer could not be generated.",
-    rate_limit="The LLM encountered a rate limit. The refined answer could not be generated.",
-    general_error="The LLM encountered an error. The refined answer could not be generated.",
-)


-@log_function_time(print_only=True)
-def generate_validate_refined_answer(
+def generate_refined_answer(
    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> RefinedAnswerUpdate:
    """
-    LangGraph node to generate the refined answer and validate it.
+    LangGraph node to generate the refined answer.
    """

    node_start_time = datetime.now()
@@ -129,24 +76,19 @@ def generate_validate_refined_answer(
    )

    verified_reranked_documents = state.verified_reranked_documents
-
-    # get all documents cited in sub-questions
-    structured_subquestion_docs = get_deduplicated_structured_subquestion_documents(
-        state.sub_question_results
-    )
-
+    sub_questions_cited_documents = state.cited_documents
    original_question_verified_documents = (
        state.orig_question_verified_reranked_documents
    )
    original_question_retrieved_documents = state.orig_question_retrieved_documents

-    consolidated_context_docs = structured_subquestion_docs.cited_documents
+    consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents

    counter = 0
    for original_doc_number, original_doc in enumerate(
        original_question_verified_documents
    ):
-        if original_doc_number not in structured_subquestion_docs.cited_documents:
+        if original_doc_number not in sub_questions_cited_documents:
            if (
                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
                or len(consolidated_context_docs)
@@ -157,16 +99,14 @@ def generate_validate_refined_answer(
                counter += 1

    # sort docs by their scores - though the scores refer to different questions
-    relevant_docs = dedup_inference_section_list(consolidated_context_docs)
+    relevant_docs = dedup_inference_sections(
+        consolidated_context_docs, consolidated_context_docs
+    )

-    # Create the list of documents to stream out. Start with the
-    # ones that wil be in the context (or, if len == 0, use docs
-    # that were retrieved for the original question)
-    answer_generation_documents = get_answer_generation_documents(
-        relevant_docs=relevant_docs,
-        context_documents=structured_subquestion_docs.context_documents,
-        original_question_docs=original_question_retrieved_documents,
-        max_docs=AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER,
+    streaming_docs = (
+        relevant_docs
+        if len(relevant_docs) > 0
+        else original_question_retrieved_documents[:15]
    )

    query_info = get_query_info(state.orig_question_sub_query_retrieval_results)
@@ -174,13 +114,11 @@ def generate_validate_refined_answer(
        graph_config.tooling.search_tool
    ), "search_tool must be provided for agentic search"
    # stream refined answer docs, or original question docs if no relevant docs are found
-    relevance_list = relevance_from_docs(
-        answer_generation_documents.streaming_documents
-    )
+    relevance_list = relevance_from_docs(relevant_docs)
    for tool_response in yield_search_responses(
        query=question,
-        reranked_sections=answer_generation_documents.streaming_documents,
-        final_context_sections=answer_generation_documents.context_documents,
+        reranked_sections=streaming_docs,
+        final_context_sections=streaming_docs,
        search_query_info=query_info,
        get_section_relevance=lambda: relevance_list,
        search_tool=graph_config.tooling.search_tool,
@@ -260,13 +198,8 @@ def generate_validate_refined_answer(
        else REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS
    )

-    model = (
-        graph_config.tooling.fast_llm
-        if AGENT_ANSWER_GENERATION_BY_FAST_LLM
-        else graph_config.tooling.primary_llm
-    )
-
-    relevant_docs_str = format_docs(answer_generation_documents.context_documents)
+    model = graph_config.tooling.fast_llm
+    relevant_docs_str = format_docs(relevant_docs)
    relevant_docs_str = trim_prompt_piece(
        model.config,
        relevant_docs_str,
@@ -296,89 +229,30 @@ def generate_validate_refined_answer(
        )
    ]

-    streamed_tokens: list[str] = [""]
+    streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
    dispatch_timings: list[float] = []
-    agent_error: AgentErrorLog | None = None
-
-    def stream_refined_answer() -> list[str]:
-        for message in model.stream(
-            msg, timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
-        ):
-            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-            content = message.content
-            if not isinstance(content, str):
-                raise ValueError(
-                    f"Expected content to be a string, but got {type(content)}"
-                )
-
-            start_stream_token = datetime.now()
-            write_custom_event(
-                "refined_agent_answer",
-                AgentAnswerPiece(
-                    answer_piece=content,
-                    level=1,
-                    level_question_num=0,
-                    answer_type="agent_level_answer",
-                ),
-                writer,
+    for message in model.stream(msg):
+        # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+        content = message.content
+        if not isinstance(content, str):
+            raise ValueError(
+                f"Expected content to be a string, but got {type(content)}"
            )
-            end_stream_token = datetime.now()
-            dispatch_timings.append(
-                (end_stream_token - start_stream_token).microseconds
-            )
-            streamed_tokens.append(content)
-        return streamed_tokens

-    try:
-        streamed_tokens = run_with_timeout(
-            AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION,
-            stream_refined_answer,
-        )
-
-    except (LLMTimeoutError, TimeoutError):
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.TIMEOUT,
-            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-            error_result=_llm_node_error_strings.timeout,
-        )
-        logger.error("LLM Timeout Error - generate refined answer")
-
-    except LLMRateLimitError:
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.RATE_LIMIT,
-            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-            error_result=_llm_node_error_strings.rate_limit,
-        )
-        logger.error("LLM Rate Limit Error - generate refined answer")
-
-    if agent_error:
+        start_stream_token = datetime.now()
        write_custom_event(
-            "initial_agent_answer",
-            StreamingError(
-                error=AGENT_LLM_TIMEOUT_MESSAGE,
+            "refined_agent_answer",
+            AgentAnswerPiece(
+                answer_piece=content,
+                level=1,
+                level_question_num=0,
+                answer_type="agent_level_answer",
            ),
            writer,
        )
-
-        return RefinedAnswerUpdate(
-            refined_answer=None,
-            refined_answer_quality=False,  # TODO: replace this with the actual check value
-            refined_agent_stats=None,
-            agent_refined_end_time=None,
-            agent_refined_metrics=AgentRefinedMetrics(
-                refined_doc_boost_factor=0.0,
-                refined_question_boost_factor=0.0,
-                duration_s=None,
-            ),
-            log_messages=[
-                get_langgraph_node_log_string(
-                    graph_component="main",
-                    node_name="generate refined answer",
-                    node_start_time=node_start_time,
-                    result=agent_error.error_result or "An LLM error occurred",
-                )
-            ],
-        )
+        end_stream_token = datetime.now()
+        dispatch_timings.append((end_stream_token - start_stream_token).microseconds)
+        streamed_tokens.append(content)

    logger.debug(
        f"Average dispatch time for refined answer: {sum(dispatch_timings) / len(dispatch_timings)}"
@@ -387,47 +261,54 @@ def generate_validate_refined_answer(
    response = merge_content(*streamed_tokens)
    answer = cast(str, response)

-    # run a validation step for the refined answer only
-
-    msg = [
-        HumanMessage(
-            content=REFINED_ANSWER_VALIDATION_PROMPT.format(
-                question=question,
-                history=prompt_enrichment_components.history,
-                answered_sub_questions=sub_question_answer_str,
-                relevant_docs=relevant_docs_str,
-                proposed_answer=answer,
-                persona_specification=persona_contextualized_prompt,
-            )
-        )
-    ]
-
-    validation_model = graph_config.tooling.fast_llm
-    try:
-        validation_response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION,
-            validation_model.invoke,
-            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION,
-        )
-        refined_answer_quality = binary_string_test_after_answer_separator(
-            text=cast(str, validation_response.content),
-            positive_value=AGENT_POSITIVE_VALUE_STR,
-            separator=AGENT_ANSWER_SEPARATOR,
-        )
-    except (LLMTimeoutError, TimeoutError):
-        refined_answer_quality = True
-        logger.error("LLM Timeout Error - validate refined answer")
-
-    except LLMRateLimitError:
-        refined_answer_quality = True
-        logger.error("LLM Rate Limit Error - validate refined answer")
-
    refined_agent_stats = RefinedAgentStats(
        revision_doc_efficiency=refined_doc_effectiveness,
        revision_question_efficiency=revision_question_efficiency,
    )

+    logger.debug(f"\n\n---INITIAL ANSWER ---\n\n Answer:\n Agent: {initial_answer}")
+    logger.debug("-" * 10)
+    logger.debug(f"\n\n---REVISED AGENT ANSWER ---\n\n Answer:\n Agent: {answer}")
+
+    logger.debug("-" * 100)
+
+    if state.initial_agent_stats:
+        initial_doc_boost_factor = state.initial_agent_stats.agent_effectiveness.get(
+            "utilized_chunk_ratio", "--"
+        )
+        initial_support_boost_factor = (
+            state.initial_agent_stats.agent_effectiveness.get("support_ratio", "--")
+        )
+        num_initial_verified_docs = state.initial_agent_stats.original_question.get(
+            "num_verified_documents", "--"
+        )
+        initial_verified_docs_avg_score = (
+            state.initial_agent_stats.original_question.get("verified_avg_score", "--")
+        )
+        initial_sub_questions_verified_docs = (
+            state.initial_agent_stats.sub_questions.get("num_verified_documents", "--")
+        )
+
+        logger.debug("INITIAL AGENT STATS")
+        logger.debug(f"Document Boost Factor: {initial_doc_boost_factor}")
+        logger.debug(f"Support Boost Factor: {initial_support_boost_factor}")
+        logger.debug(f"Originally Verified Docs: {num_initial_verified_docs}")
+        logger.debug(
+            f"Originally Verified Docs Avg Score: {initial_verified_docs_avg_score}"
+        )
+        logger.debug(
+            f"Sub-Questions Verified Docs: {initial_sub_questions_verified_docs}"
+        )
+    if refined_agent_stats:
+        logger.debug("-" * 10)
+        logger.debug("REFINED AGENT STATS")
+        logger.debug(
+            f"Revision Doc Factor: {refined_agent_stats.revision_doc_efficiency}"
+        )
+        logger.debug(
+            f"Revision Question Factor: {refined_agent_stats.revision_question_efficiency}"
+        )
+
    agent_refined_end_time = datetime.now()
    if state.agent_refined_start_time:
        agent_refined_duration = (
@@ -444,7 +325,7 @@ def generate_validate_refined_answer(

    return RefinedAnswerUpdate(
        refined_answer=answer,
-        refined_answer_quality=refined_answer_quality,
+        refined_answer_quality=True,  # TODO: replace this with the actual check value
        refined_agent_stats=refined_agent_stats,
        agent_refined_end_time=agent_refined_end_time,
        agent_refined_metrics=agent_refined_metrics,
--- a/backend/onyx/agents/agent_search/deep_search/main/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/states.py
@@ -17,7 +17,6 @@ from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
 from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
 from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
 from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
 from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
@@ -77,7 +76,6 @@ class InitialAnswerUpdate(LoggerUpdate):
    """

    initial_answer: str | None = None
-    answer_error: AgentErrorLog | None = None
    initial_agent_stats: InitialAgentResultStats | None = None
    generated_sub_questions: list[str] = []
    agent_base_end_time: datetime | None = None
@@ -90,7 +88,6 @@ class RefinedAnswerUpdate(RefinedAgentEndStats, LoggerUpdate):
    """

    refined_answer: str | None = None
-    answer_error: AgentErrorLog | None = None
    refined_agent_stats: RefinedAgentStats | None = None
    refined_answer_quality: bool = False

--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
@@ -16,46 +16,16 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
    QueryExpansionUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_RATELIMIT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_LLM_TIMEOUT_MESSAGE,
-)
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AgentLLMErrorType,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
-from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
-)
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    QUERY_REWRITING_PROMPT,
 )
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="Query rewriting failed due to LLM timeout - the original question will be used.",
-    rate_limit="Query rewriting failed due to LLM rate limit - the original question will be used.",
-    general_error="Query rewriting failed due to LLM error - the original question will be used.",
-)


-@log_function_time(print_only=True)
 def expand_queries(
    state: ExpandedRetrievalInput,
    config: RunnableConfig,
@@ -71,7 +41,7 @@ def expand_queries(
    node_start_time = datetime.now()
    question = state.question

-    model = graph_config.tooling.fast_llm
+    llm = graph_config.tooling.fast_llm
    sub_question_id = state.sub_question_id
    if sub_question_id is None:
        level, question_num = 0, 0
@@ -84,45 +54,13 @@ def expand_queries(
        )
    ]

-    agent_error: AgentErrorLog | None = None
-    llm_response_list: list[BaseMessage_Content] = []
-    llm_response = ""
-    rewritten_queries = []
+    llm_response_list = dispatch_separated(
+        llm.stream(prompt=msg), dispatch_subquery(level, question_num, writer)
+    )

-    try:
-        llm_response_list = run_with_timeout(
-            AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION,
-            dispatch_separated,
-            model.stream(
-                prompt=msg,
-                timeout_override=AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION,
-            ),
-            dispatch_subquery(level, question_num, writer),
-        )
-        llm_response = merge_message_runs(llm_response_list, chunk_separator="")[
-            0
-        ].content
-        rewritten_queries = llm_response.split("\n")
-        log_result = f"Number of expanded queries: {len(rewritten_queries)}"
+    llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content

-    except (LLMTimeoutError, TimeoutError):
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.TIMEOUT,
-            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
-            error_result=_llm_node_error_strings.timeout,
-        )
-        logger.error("LLM Timeout Error - expand queries")
-        log_result = agent_error.error_result
-
-    except LLMRateLimitError:
-        agent_error = AgentErrorLog(
-            error_type=AgentLLMErrorType.RATE_LIMIT,
-            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
-            error_result=_llm_node_error_strings.rate_limit,
-        )
-        logger.error("LLM Rate Limit Error - expand queries")
-        log_result = agent_error.error_result
-    # use subquestion as query if query generation fails
+    rewritten_queries = llm_response.split("\n")

    return QueryExpansionUpdate(
        expanded_queries=rewritten_queries,
@@ -131,7 +69,7 @@ def expand_queries(
                graph_component="shared - expanded retrieval",
                node_name="expand queries",
                node_start_time=node_start_time,
-                result=log_result,
+                result=f"Number of expanded queries: {len(rewritten_queries)}",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
@@ -26,10 +26,8 @@ from onyx.context.search.postprocessing.postprocessing import rerank_sections
 from onyx.context.search.postprocessing.postprocessing import should_rerank
 from onyx.db.engine import get_session_context_manager
 from onyx.db.search_settings import get_current_search_settings
-from onyx.utils.timing import log_function_time


-@log_function_time(print_only=True)
 def rerank_documents(
    state: ExpandedRetrievalState, config: RunnableConfig
 ) -> DocRerankingUpdate:
@@ -55,7 +53,6 @@ def rerank_documents(

    # Note that these are passed in values from the API and are overrides which are typically None
    rerank_settings = graph_config.inputs.search_request.rerank_settings
-    allow_agent_reranking = graph_config.behavior.allow_agent_reranking

    if rerank_settings is None:
        with get_session_context_manager() as db_session:
@@ -63,31 +60,23 @@ def rerank_documents(
            if not search_settings.disable_rerank_for_streaming:
                rerank_settings = RerankingDetails.from_db_model(search_settings)

-    # Initial default: no reranking. Will be overwritten below if reranking is warranted
-    reranked_documents = verified_documents
-
    if should_rerank(rerank_settings) and len(verified_documents) > 0:
        if len(verified_documents) > 1:
-            if not allow_agent_reranking:
-                logger.info("Use of local rerank model without GPU, skipping reranking")
-            # No reranking, stay with verified_documents as default
-
-            else:
-                # Reranking is warranted, use the rerank_sections functon
-                reranked_documents = rerank_sections(
-                    query_str=question,
-                    # if runnable, then rerank_settings is not None
-                    rerank_settings=cast(RerankingDetails, rerank_settings),
-                    sections_to_rerank=verified_documents,
-                )
+            reranked_documents = rerank_sections(
+                query_str=question,
+                # if runnable, then rerank_settings is not None
+                rerank_settings=cast(RerankingDetails, rerank_settings),
+                sections_to_rerank=verified_documents,
+            )
        else:
            logger.warning(
                f"{len(verified_documents)} verified document(s) found, skipping reranking"
            )
-            # No reranking, stay with verified_documents as default
+            reranked_documents = verified_documents
    else:
        logger.warning("No reranking settings found, using unranked documents")
-        # No reranking, stay with verified_documents as default
+        reranked_documents = verified_documents
+
    if AGENT_RERANKING_STATS:
        fit_scores = get_fit_scores(verified_documents, reranked_documents)
    else:
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/retrieve_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/retrieve_documents.py
@@ -28,10 +28,8 @@ from onyx.tools.tool_implementations.search.search_tool import (
    SEARCH_RESPONSE_SUMMARY_ID,
 )
 from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
-from onyx.utils.timing import log_function_time


-@log_function_time(print_only=True)
 def retrieve_documents(
    state: RetrievalInput, config: RunnableConfig
 ) -> DocRetrievalUpdate:
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
@@ -1,7 +1,5 @@
-from datetime import datetime
 from typing import cast

-from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.runnables.config import RunnableConfig

@@ -12,40 +10,14 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
    DocVerificationUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
-from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
-    binary_string_test,
-)
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
-from onyx.agents.agent_search.shared_graph_utils.constants import (
-    AGENT_POSITIVE_VALUE_STR,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
-from onyx.agents.agent_search.shared_graph_utils.utils import (
-    get_langgraph_node_log_string,
-)
-from onyx.configs.agent_configs import AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    DOCUMENT_VERIFICATION_PROMPT,
 )
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout
-from onyx.utils.timing import log_function_time
-
-logger = setup_logger()
-
-_llm_node_error_strings = LLMNodeErrorStrings(
-    timeout="The LLM timed out. The document could not be verified. The document will be treated as 'relevant'",
-    rate_limit="The LLM encountered a rate limit. The document could not be verified. The document will be treated as 'relevant'",
-    general_error="The LLM encountered an error. The document could not be verified. The document will be treated as 'relevant'",
-)


-@log_function_time(print_only=True)
 def verify_documents(
    state: DocVerificationInput, config: RunnableConfig
 ) -> DocVerificationUpdate:
@@ -54,14 +26,12 @@ def verify_documents(

    Args:
        state (DocVerificationInput): The current state
-        config (RunnableConfig): Configuration containing AgentSearchConfig
+        config (RunnableConfig): Configuration containing ProSearchConfig

    Updates:
        verified_documents: list[InferenceSection]
    """

-    node_start_time = datetime.now()
-
    question = state.question
    retrieved_document_to_verify = state.retrieved_document_to_verify
    document_content = retrieved_document_to_verify.combined_content
@@ -81,43 +51,12 @@ def verify_documents(
        )
    ]

-    response: BaseMessage | None = None
+    response = fast_llm.invoke(msg)

-    verified_documents = [
-        retrieved_document_to_verify
-    ]  # default is to treat document as relevant
-
-    try:
-        response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION,
-            fast_llm.invoke,
-            prompt=msg,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION,
-        )
-
-        assert isinstance(response.content, str)
-        if not binary_string_test(
-            text=response.content, positive_value=AGENT_POSITIVE_VALUE_STR
-        ):
-            verified_documents = []
-
-    except (LLMTimeoutError, TimeoutError):
-        # In this case, we decide to continue and don't raise an error, as
-        # little harm in letting some docs through that are less relevant.
-        logger.error("LLM Timeout Error - verify documents")
-
-    except LLMRateLimitError:
-        # In this case, we decide to continue and don't raise an error, as
-        # little harm in letting some docs through that are less relevant.
-        logger.error("LLM Rate Limit Error - verify documents")
+    verified_documents = []
+    if isinstance(response.content, str) and "yes" in response.content.lower():
+        verified_documents.append(retrieved_document_to_verify)

    return DocVerificationUpdate(
        verified_documents=verified_documents,
-        log_messages=[
-            get_langgraph_node_log_string(
-                graph_component="shared - expanded retrieval",
-                node_name="verify documents",
-                node_start_time=node_start_time,
-            )
-        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/states.py
@@ -21,13 +21,9 @@ from onyx.context.search.models import InferenceSection


 class ExpandedRetrievalInput(SubgraphCoreState):
-    # exception from 'no default value'for LangGraph input states
-    # Here, sub_question_id default None implies usage for the
-    # original question. This is sometimes needed for nested sub-graphs
-
+    question: str = ""
+    base_search: bool = False
    sub_question_id: str | None = None
-    question: str
-    base_search: bool


 ## Update/Return States
@@ -38,7 +34,7 @@ class QueryExpansionUpdate(LoggerUpdate, BaseModel):
    log_messages: list[str] = []


-class DocVerificationUpdate(LoggerUpdate, BaseModel):
+class DocVerificationUpdate(BaseModel):
    verified_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []


@@ -92,4 +88,4 @@ class DocVerificationInput(ExpandedRetrievalInput):


 class RetrievalInput(ExpandedRetrievalInput):
-    query_to_retrieve: str
+    query_to_retrieve: str = ""
--- a/backend/onyx/agents/agent_search/models.py
+++ b/backend/onyx/agents/agent_search/models.py
@@ -67,7 +67,6 @@ class GraphSearchConfig(BaseModel):
    # Whether to allow creation of refinement questions (and entity extraction, etc.)
    allow_refinement: bool = True
    skip_gen_ai_answer_generation: bool = False
-    allow_agent_reranking: bool = False


 class GraphConfig(BaseModel):
--- a/backend/onyx/agents/agent_search/orchestration/nodes/basic_use_tool_response.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/basic_use_tool_response.py
--- a/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/llm_tool_choice.py
@@ -25,7 +25,7 @@ logger = setup_logger()
 # and a function that handles extracting the necessary fields
 # from the state and config
 # TODO: fan-out to multiple tool call nodes? Make this configurable?
-def choose_tool(
+def llm_tool_choice(
    state: ToolChoiceState,
    config: RunnableConfig,
    writer: StreamWriter = lambda _: None,
--- a/backend/onyx/agents/agent_search/orchestration/nodes/tool_call.py
+++ b/backend/onyx/agents/agent_search/orchestration/nodes/tool_call.py
@@ -28,7 +28,7 @@ def emit_packet(packet: AnswerPacket, writer: StreamWriter) -> None:
    write_custom_event("basic_response", packet, writer)


-def call_tool(
+def tool_call(
    state: ToolChoiceUpdate,
    config: RunnableConfig,
    writer: StreamWriter = lambda _: None,
--- a/backend/onyx/agents/agent_search/run_graph.py
+++ b/backend/onyx/agents/agent_search/run_graph.py
@@ -12,7 +12,7 @@ from onyx.agents.agent_search.deep_search.main.graph_builder import (
    main_graph_builder as main_graph_builder_a,
 )
 from onyx.agents.agent_search.deep_search.main.states import (
-    MainInput as MainInput,
+    MainInput as MainInput_a,
 )
 from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
@@ -21,7 +21,6 @@ from onyx.chat.models import AnswerPacket
 from onyx.chat.models import AnswerStream
 from onyx.chat.models import ExtendedToolResponse
 from onyx.chat.models import RefinedAnswerImprovement
-from onyx.chat.models import StreamingError
 from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import SubQueryPiece
 from onyx.chat.models import SubQuestionPiece
@@ -34,7 +33,6 @@ from onyx.llm.factory import get_default_llms
 from onyx.tools.tool_runner import ToolCallKickoff
 from onyx.utils.logger import setup_logger

-
 logger = setup_logger()

 _COMPILED_GRAPH: CompiledStateGraph | None = None
@@ -74,15 +72,13 @@ def _parse_agent_event(
            return cast(AnswerPacket, event["data"])
        elif event["name"] == "refined_answer_improvement":
            return cast(RefinedAnswerImprovement, event["data"])
-        elif event["name"] == "refined_sub_question_creation_error":
-            return cast(StreamingError, event["data"])
    return None


 def manage_sync_streaming(
    compiled_graph: CompiledStateGraph,
    config: GraphConfig,
-    graph_input: BasicInput | MainInput,
+    graph_input: BasicInput | MainInput_a,
 ) -> Iterable[StreamEvent]:
    message_id = config.persistence.message_id if config.persistence else None
    for event in compiled_graph.stream(
@@ -96,7 +92,7 @@ def manage_sync_streaming(
 def run_graph(
    compiled_graph: CompiledStateGraph,
    config: GraphConfig,
-    input: BasicInput | MainInput,
+    input: BasicInput | MainInput_a,
 ) -> AnswerStream:
    config.behavior.perform_initial_search_decomposition = (
        INITIAL_SEARCH_DECOMPOSITION_ENABLED
@@ -127,7 +123,9 @@ def run_main_graph(
 ) -> AnswerStream:
    compiled_graph = load_compiled_graph()

-    input = MainInput(log_messages=[])
+    input = MainInput_a(
+        base_question=config.inputs.search_request.query, log_messages=[]
+    )

    # Agent search is not a Tool per se, but this is helpful for the frontend
    yield ToolCallKickoff(
@@ -142,7 +140,7 @@ def run_basic_graph(
 ) -> AnswerStream:
    graph = basic_graph_builder()
    compiled_graph = graph.compile()
-    input = BasicInput(unused=True)
+    input = BasicInput()
    return run_graph(compiled_graph, config, input)


@@ -174,7 +172,9 @@ if __name__ == "__main__":
            # search_request.persona = get_persona_by_id(1, None, db_session)
            # config.perform_initial_search_path_decision = False
            config.behavior.perform_initial_search_decomposition = True
-            input = MainInput(log_messages=[])
+            input = MainInput_a(
+                base_question=config.inputs.search_request.query, log_messages=[]
+            )

            tool_responses: list = []
            for output in run_graph(compiled_graph, config, input):
--- a/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
@@ -7,7 +7,6 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.models import (
    AgentPromptEnrichmentComponents,
 )
-from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_persona_agent_prompt_expressions,
 )
@@ -41,7 +40,13 @@ def build_sub_question_answer_prompt(

    date_str = build_date_time_string()

-    docs_str = format_docs(docs)
+    # TODO: This should include document metadata and title
+    docs_format_list = [
+        f"Document Number: [D{doc_num + 1}]\nContent: {doc.combined_content}\n\n"
+        for doc_num, doc in enumerate(docs)
+    ]
+
+    docs_str = "\n\n".join(docs_format_list)

    docs_str = trim_prompt_piece(
        config,
@@ -145,38 +150,3 @@ def get_prompt_enrichment_components(
        history=history,
        date_str=date_str,
    )
-
-
-def binary_string_test(text: str, positive_value: str = "yes") -> bool:
-    """
-    Tests if a string contains a positive value (case-insensitive).
-
-    Args:
-        text: The string to test
-        positive_value: The value to look for (defaults to "yes")
-
-    Returns:
-        True if the positive value is found in the text
-    """
-    return positive_value.lower() in text.lower()
-
-
-def binary_string_test_after_answer_separator(
-    text: str, positive_value: str = "yes", separator: str = "Answer:"
-) -> bool:
-    """
-    Tests if a string contains a positive value (case-insensitive).
-
-    Args:
-        text: The string to test
-        positive_value: The value to look for (defaults to "yes")
-
-    Returns:
-        True if the positive value is found in the text
-    """
-
-    if separator not in text:
-        return False
-    relevant_text = text.split(f"{separator}")[-1]
-
-    return binary_string_test(relevant_text, positive_value)
--- a/backend/onyx/agents/agent_search/shared_graph_utils/calculations.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/calculations.py
@@ -1,11 +1,7 @@
 import numpy as np

-from onyx.agents.agent_search.shared_graph_utils.models import AnswerGenerationDocuments
 from onyx.agents.agent_search.shared_graph_utils.models import RetrievalFitScoreMetrics
 from onyx.agents.agent_search.shared_graph_utils.models import RetrievalFitStats
-from onyx.agents.agent_search.shared_graph_utils.operators import (
-    dedup_inference_section_list,
-)
 from onyx.chat.models import SectionRelevancePiece
 from onyx.context.search.models import InferenceSection
 from onyx.utils.logger import setup_logger
@@ -100,106 +96,3 @@ def get_fit_scores(
    )

    return fit_eval
-
-
-def get_answer_generation_documents(
-    relevant_docs: list[InferenceSection],
-    context_documents: list[InferenceSection],
-    original_question_docs: list[InferenceSection],
-    max_docs: int,
-) -> AnswerGenerationDocuments:
-    """
-    Create a deduplicated list of documents to stream, prioritizing relevant docs.
-
-    Args:
-        relevant_docs: Primary documents to include
-        context_documents: Additional context documents to append
-        original_question_docs: Original question documents to append
-        max_docs: Maximum number of documents to return
-
-    Returns:
-        List of deduplicated documents, limited to max_docs
-    """
-    # get relevant_doc ids
-    relevant_doc_ids = [doc.center_chunk.document_id for doc in relevant_docs]
-
-    # Start with relevant docs or fallback to original question docs
-    streaming_documents = relevant_docs.copy()
-
-    # Use a set for O(1) lookups of document IDs
-    seen_doc_ids = {doc.center_chunk.document_id for doc in streaming_documents}
-
-    # Combine additional documents to check in one iteration
-    additional_docs = context_documents + original_question_docs
-    for doc_idx, doc in enumerate(additional_docs):
-        doc_id = doc.center_chunk.document_id
-        if doc_id not in seen_doc_ids:
-            streaming_documents.append(doc)
-            seen_doc_ids.add(doc_id)
-
-    streaming_documents = dedup_inference_section_list(streaming_documents)
-
-    relevant_streaming_docs = [
-        doc
-        for doc in streaming_documents
-        if doc.center_chunk.document_id in relevant_doc_ids
-    ]
-    relevant_streaming_docs = dedup_sort_inference_section_list(relevant_streaming_docs)
-
-    additional_streaming_docs = [
-        doc
-        for doc in streaming_documents
-        if doc.center_chunk.document_id not in relevant_doc_ids
-    ]
-    additional_streaming_docs = dedup_sort_inference_section_list(
-        additional_streaming_docs
-    )
-
-    for doc in additional_streaming_docs:
-        if doc.center_chunk.score:
-            doc.center_chunk.score += -2.0
-        else:
-            doc.center_chunk.score = -2.0
-
-    sorted_streaming_documents = relevant_streaming_docs + additional_streaming_docs
-
-    return AnswerGenerationDocuments(
-        streaming_documents=sorted_streaming_documents[:max_docs],
-        context_documents=relevant_streaming_docs[:max_docs],
-    )
-
-
-def dedup_sort_inference_section_list(
-    sections: list[InferenceSection],
-) -> list[InferenceSection]:
-    """Deduplicates InferenceSections by document_id and sorts by score.
-
-    Args:
-        sections: List of InferenceSections to deduplicate and sort
-
-    Returns:
-        Deduplicated list of InferenceSections sorted by score in descending order
-    """
-    # dedupe/merge with existing framework
-    sections = dedup_inference_section_list(sections)
-
-    # Use dict to deduplicate by document_id, keeping highest scored version
-    unique_sections: dict[str, InferenceSection] = {}
-    for section in sections:
-        doc_id = section.center_chunk.document_id
-        if doc_id not in unique_sections:
-            unique_sections[doc_id] = section
-            continue
-
-        # Keep version with higher score
-        existing_score = unique_sections[doc_id].center_chunk.score or 0
-        new_score = section.center_chunk.score or 0
-        if new_score > existing_score:
-            unique_sections[doc_id] = section
-
-    # Sort by score in descending order, handling None scores
-    sorted_sections = sorted(
-        unique_sections.values(), key=lambda x: x.center_chunk.score or 0, reverse=True
-    )
-
-    return sorted_sections
--- a/backend/onyx/agents/agent_search/shared_graph_utils/constants.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/constants.py
@@ -1,19 +0,0 @@
-from enum import Enum
-
-AGENT_LLM_TIMEOUT_MESSAGE = "The agent timed out. Please try again."
-AGENT_LLM_ERROR_MESSAGE = "The agent encountered an error. Please try again."
-AGENT_LLM_RATELIMIT_MESSAGE = (
-    "The agent encountered a rate limit error. Please try again."
-)
-LLM_ANSWER_ERROR_MESSAGE = "The question was not answered due to an LLM error."
-
-AGENT_POSITIVE_VALUE_STR = "yes"
-AGENT_NEGATIVE_VALUE_STR = "no"
-
-AGENT_ANSWER_SEPARATOR = "Answer:"
-
-
-class AgentLLMErrorType(str, Enum):
-    TIMEOUT = "timeout"
-    RATE_LIMIT = "rate_limit"
-    GENERAL_ERROR = "general_error"
--- a/backend/onyx/agents/agent_search/shared_graph_utils/models.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/models.py
@@ -1,5 +1,3 @@
-from typing import Any
-
 from pydantic import BaseModel

 from onyx.agents.agent_search.deep_search.main.models import (
@@ -58,12 +56,6 @@ class InitialAgentResultStats(BaseModel):
    agent_effectiveness: dict[str, float | int | None]


-class AgentErrorLog(BaseModel):
-    error_message: str
-    error_type: str
-    error_result: str
-
-
 class RefinedAgentStats(BaseModel):
    revision_doc_efficiency: float | None
    revision_question_efficiency: float | None
@@ -118,11 +110,6 @@ class SubQuestionAnswerResults(BaseModel):
    sub_question_retrieval_stats: AgentChunkRetrievalStats


-class StructuredSubquestionDocuments(BaseModel):
-    cited_documents: list[InferenceSection]
-    context_documents: list[InferenceSection]
-
-
 class CombinedAgentMetrics(BaseModel):
    timings: AgentTimings
    base_metrics: AgentBaseMetrics | None
@@ -139,17 +126,3 @@ class AgentPromptEnrichmentComponents(BaseModel):
    persona_prompts: PersonaPromptExpressions
    history: str
    date_str: str
-
-
-class LLMNodeErrorStrings(BaseModel):
-    timeout: str = "LLM Timeout Error"
-    rate_limit: str = "LLM Rate Limit Error"
-    general_error: str = "General LLM Error"
-
-
-class AnswerGenerationDocuments(BaseModel):
-    streaming_documents: list[InferenceSection]
-    context_documents: list[InferenceSection]
-
-
-BaseMessage_Content = str | list[str | dict[str, Any]]
--- a/backend/onyx/agents/agent_search/shared_graph_utils/operators.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/operators.py
@@ -12,13 +12,6 @@ def dedup_inference_sections(
    return deduped


-def dedup_inference_section_list(
-    list: list[InferenceSection],
-) -> list[InferenceSection]:
-    deduped = _merge_sections(list)
-    return deduped
-
-
 def dedup_question_answer_results(
    question_answer_results_1: list[SubQuestionAnswerResults],
    question_answer_results_2: list[SubQuestionAnswerResults],
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -20,18 +20,10 @@ from onyx.agents.agent_search.models import GraphInputs
 from onyx.agents.agent_search.models import GraphPersistence
 from onyx.agents.agent_search.models import GraphSearchConfig
 from onyx.agents.agent_search.models import GraphTooling
-from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
 from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
 from onyx.agents.agent_search.shared_graph_utils.models import PersonaPromptExpressions
-from onyx.agents.agent_search.shared_graph_utils.models import (
-    StructuredSubquestionDocuments,
-)
-from onyx.agents.agent_search.shared_graph_utils.models import SubQuestionAnswerResults
-from onyx.agents.agent_search.shared_graph_utils.operators import (
-    dedup_inference_section_list,
-)
 from onyx.chat.models import AnswerPacket
 from onyx.chat.models import AnswerStyleConfig
 from onyx.chat.models import CitationConfig
@@ -42,10 +34,6 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
-from onyx.configs.agent_configs import (
-    AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
-)
-from onyx.configs.agent_configs import AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION
 from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from onyx.configs.constants import DEFAULT_PERSONA_ID
@@ -58,8 +46,6 @@ from onyx.context.search.models import SearchRequest
 from onyx.db.engine import get_session_context_manager
 from onyx.db.persona import get_persona_by_id
 from onyx.db.persona import Persona
-from onyx.llm.chat_llm import LLMRateLimitError
-from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.llm.interfaces import LLM
 from onyx.prompts.agent_search import (
    ASSISTANT_SYSTEM_PROMPT_DEFAULT,
@@ -80,10 +66,8 @@ from onyx.tools.tool_implementations.search.search_tool import (
 from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.utils import explicit_tool_calling_supported
-from onyx.utils.logger import setup_logger
-from onyx.utils.threadpool_concurrency import run_with_timeout

-logger = setup_logger()
+BaseMessage_Content = str | list[str | dict[str, Any]]


 # Post-processing
@@ -396,26 +380,8 @@ def summarize_history(
        )
    )

-    try:
-        history_response = run_with_timeout(
-            AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION,
-            llm.invoke,
-            history_context_prompt,
-            timeout_override=AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION,
-        )
-    except (LLMTimeoutError, TimeoutError):
-        logger.error("LLM Timeout Error - summarize history")
-        return (
-            history  # this is what is done at this point anyway, so we default to this
-        )
-    except LLMRateLimitError:
-        logger.error("LLM Rate Limit Error - summarize history")
-        return (
-            history  # this is what is done at this point anyway, so we default to this
-        )
-
+    history_response = llm.invoke(history_context_prompt)
    assert isinstance(history_response.content, str)
-
    return history_response.content


@@ -481,27 +447,3 @@ def remove_document_citations(text: str) -> str:
    #   \d+  - one or more digits
    #   \]   - literal ] character
    return re.sub(r"\[(?:D|Q)?\d+\]", "", text)
-
-
-def get_deduplicated_structured_subquestion_documents(
-    sub_question_results: list[SubQuestionAnswerResults],
-) -> StructuredSubquestionDocuments:
-    """
-    Extract and deduplicate all cited documents from sub-question results.
-
-    Args:
-        sub_question_results: List of sub-question results containing cited documents
-
-    Returns:
-        Deduplicated list of cited documents
-    """
-    cited_docs = [
-        doc for result in sub_question_results for doc in result.cited_documents
-    ]
-    context_docs = [
-        doc for result in sub_question_results for doc in result.context_documents
-    ]
-    return StructuredSubquestionDocuments(
-        cited_documents=dedup_inference_section_list(cited_docs),
-        context_documents=dedup_inference_section_list(context_docs),
-    )
--- a/backend/onyx/auth/email_utils.py
+++ b/backend/onyx/auth/email_utils.py
@@ -10,7 +10,6 @@ from onyx.configs.app_configs import SMTP_PORT
 from onyx.configs.app_configs import SMTP_SERVER
 from onyx.configs.app_configs import SMTP_USER
 from onyx.configs.app_configs import WEB_DOMAIN
-from onyx.configs.constants import AuthType
 from onyx.configs.constants import TENANT_ID_COOKIE_NAME
 from onyx.db.models import User

@@ -188,51 +187,23 @@ def send_subscription_cancellation_email(user_email: str) -> None:
    send_email(user_email, subject, html_content, text_content)


-def send_user_email_invite(
-    user_email: str, current_user: User, auth_type: AuthType
-) -> None:
+def send_user_email_invite(user_email: str, current_user: User) -> None:
    subject = "Invitation to Join Onyx Organization"
    heading = "You've Been Invited!"
-
-    # the exact action taken by the user, and thus the message, depends on the auth type
-    message = f"<p>You have been invited by {current_user.email} to join an organization on Onyx.</p>"
-    if auth_type == AuthType.CLOUD:
-        message += (
-            "<p>To join the organization, please click the button below to set a password "
-            "or login with Google and complete your registration.</p>"
-        )
-    elif auth_type == AuthType.BASIC:
-        message += (
-            "<p>To join the organization, please click the button below to set a password "
-            "and complete your registration.</p>"
-        )
-    elif auth_type == AuthType.GOOGLE_OAUTH:
-        message += (
-            "<p>To join the organization, please click the button below to login with Google "
-            "and complete your registration.</p>"
-        )
-    elif auth_type == AuthType.OIDC or auth_type == AuthType.SAML:
-        message += (
-            "<p>To join the organization, please click the button below to"
-            " complete your registration.</p>"
-        )
-    else:
-        raise ValueError(f"Invalid auth type: {auth_type}")
-
+    message = (
+        f"<p>You have been invited by {current_user.email} to join an organization on Onyx.</p>"
+        "<p>To join the organization, please click the button below to set a password "
+        "or login with Google and complete your registration.</p>"
+    )
    cta_text = "Join Organization"
    cta_link = f"{WEB_DOMAIN}/auth/signup?email={user_email}"
    html_content = build_html_email(heading, message, cta_text, cta_link)
-
-    # text content is the fallback for clients that don't support HTML
-    # not as critical, so not having special cases for each auth type
    text_content = (
        f"You have been invited by {current_user.email} to join an organization on Onyx.\n"
        "To join the organization, please visit the following link:\n"
        f"{WEB_DOMAIN}/auth/signup?email={user_email}\n"
+        "You'll be asked to set a password or login with Google to complete your registration."
    )
-    if auth_type == AuthType.CLOUD:
-        text_content += "You'll be asked to set a password or login with Google to complete your registration."
-
    send_email(user_email, subject, html_content, text_content)


--- a/backend/onyx/auth/noauth_user.py
+++ b/backend/onyx/auth/noauth_user.py
@@ -42,5 +42,4 @@ def fetch_no_auth_user(
        role=UserRole.BASIC if anonymous_user_enabled else UserRole.ADMIN,
        preferences=load_no_auth_user_preferences(store),
        is_anonymous_user=anonymous_user_enabled,
-        password_configured=False,
    )
--- a/backend/onyx/auth/users.py
+++ b/backend/onyx/auth/users.py
@@ -1,7 +1,5 @@
 import json
-import random
 import secrets
-import string
 import uuid
 from collections.abc import AsyncGenerator
 from datetime import datetime
@@ -88,6 +86,7 @@ from onyx.db.auth import get_user_db
 from onyx.db.auth import SQLAlchemyUserAdminDB
 from onyx.db.engine import get_async_session
 from onyx.db.engine import get_async_session_with_tenant
+from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.models import AccessToken
 from onyx.db.models import OAuthAccount
@@ -95,7 +94,6 @@ from onyx.db.models import User
 from onyx.db.users import get_user_by_email
 from onyx.redis.redis_pool import get_async_redis_connection
 from onyx.redis.redis_pool import get_redis_client
-from onyx.server.utils import BasicAuthenticationError
 from onyx.utils.logger import setup_logger
 from onyx.utils.telemetry import create_milestone_and_report
 from onyx.utils.telemetry import optional_telemetry
@@ -105,11 +103,15 @@ from onyx.utils.variable_functionality import fetch_versioned_implementation
 from shared_configs.configs import async_return_default_schema
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
-from shared_configs.contextvars import get_current_tenant_id

 logger = setup_logger()


+class BasicAuthenticationError(HTTPException):
+    def __init__(self, detail: str):
+        super().__init__(status_code=status.HTTP_403_FORBIDDEN, detail=detail)
+
+
 def is_user_admin(user: User | None) -> bool:
    if AUTH_TYPE == AuthType.DISABLED:
        return True
@@ -141,30 +143,6 @@ def get_display_email(email: str | None, space_less: bool = False) -> str:
    return email or ""


-def generate_password() -> str:
-    lowercase_letters = string.ascii_lowercase
-    uppercase_letters = string.ascii_uppercase
-    digits = string.digits
-    special_characters = string.punctuation
-
-    # Ensure at least one of each required character type
-    password = [
-        secrets.choice(uppercase_letters),
-        secrets.choice(digits),
-        secrets.choice(special_characters),
-    ]
-
-    # Fill the rest with a mix of characters
-    remaining_length = 12 - len(password)
-    all_characters = lowercase_letters + uppercase_letters + digits + special_characters
-    password.extend(secrets.choice(all_characters) for _ in range(remaining_length))
-
-    # Shuffle the password to randomize the position of the required characters
-    random.shuffle(password)
-
-    return "".join(password)
-
-
 def user_needs_to_be_verified() -> bool:
    if AUTH_TYPE == AuthType.BASIC or AUTH_TYPE == AuthType.CLOUD:
        return REQUIRE_EMAIL_VERIFICATION
@@ -215,7 +193,7 @@ def verify_email_is_invited(email: str) -> None:


 def verify_email_in_whitelist(email: str, tenant_id: str | None = None) -> None:
-    with get_session_with_tenant(tenant_id=tenant_id) as db_session:
+    with get_session_with_tenant(tenant_id) as db_session:
        if not get_user_by_email(email, db_session):
            verify_email_is_invited(email)

@@ -617,39 +595,6 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]):

            return user

-    async def reset_password_as_admin(self, user_id: uuid.UUID) -> str:
-        """Admin-only. Generate a random password for a user and return it."""
-        user = await self.get(user_id)
-        new_password = generate_password()
-        await self._update(user, {"password": new_password})
-        return new_password
-
-    async def change_password_if_old_matches(
-        self, user: User, old_password: str, new_password: str
-    ) -> None:
-        """
-        For normal users to change password if they know the old one.
-        Raises 400 if old password doesn't match.
-        """
-        verified, updated_password_hash = self.password_helper.verify_and_update(
-            old_password, user.hashed_password
-        )
-        if not verified:
-            # Raise some HTTPException (or your custom exception) if old password is invalid:
-            from fastapi import HTTPException, status
-
-            raise HTTPException(
-                status_code=status.HTTP_400_BAD_REQUEST,
-                detail="Invalid current password",
-            )
-
-        # If the hash was upgraded behind the scenes, we can keep it before setting the new password:
-        if updated_password_hash:
-            user.hashed_password = updated_password_hash
-
-        # Now apply and validate the new password
-        await self._update(user, {"password": new_password})
-

 async def get_user_manager(
    user_db: SQLAlchemyUserDatabase = Depends(get_user_db),
@@ -874,9 +819,8 @@ async def current_limited_user(

 async def current_chat_accesssible_user(
    user: User | None = Depends(optional_user),
+    tenant_id: str | None = Depends(get_current_tenant_id),
 ) -> User | None:
-    tenant_id = get_current_tenant_id()
-
    return await double_check_user(
        user, allow_anonymous_access=anonymous_user_enabled(tenant_id=tenant_id)
    )
--- a/backend/onyx/background/celery/apps/app_base.py
+++ b/backend/onyx/background/celery/apps/app_base.py
@@ -33,7 +33,6 @@ from onyx.redis.redis_connector_ext_group_sync import RedisConnectorExternalGrou
 from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_document_set import RedisDocumentSet
 from onyx.redis.redis_pool import get_redis_client
-from onyx.redis.redis_pool import get_shared_redis_client
 from onyx.redis.redis_usergroup import RedisUserGroup
 from onyx.utils.logger import ColoredFormatter
 from onyx.utils.logger import PlainFormatter
@@ -59,35 +58,13 @@ else:
    logger.debug("Sentry DSN not provided, skipping Sentry initialization")


-class TenantAwareTask(Task):
-    """A custom base Task that sets tenant_id in a contextvar before running."""
-
-    abstract = True  # So Celery knows not to register this as a real task.
-
-    def __call__(self, *args: Any, **kwargs: Any) -> Any:
-        # Grab tenant_id from the kwargs, or fallback to default if missing.
-        tenant_id = kwargs.get("tenant_id", None) or POSTGRES_DEFAULT_SCHEMA
-
-        # Set the context var
-        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
-
-        # Actually run the task now
-        try:
-            return super().__call__(*args, **kwargs)
-        finally:
-            # Clear or reset after the task runs
-            # so it does not leak into any subsequent tasks on the same worker process
-            CURRENT_TENANT_ID_CONTEXTVAR.set(None)
-
-
-@task_prerun.connect
 def on_task_prerun(
    sender: Any | None = None,
    task_id: str | None = None,
    task: Task | None = None,
    args: tuple[Any, ...] | None = None,
    kwargs: dict[str, Any] | None = None,
-    **other_kwargs: Any,
+    **kwds: Any,
 ) -> None:
    pass

@@ -224,7 +201,7 @@ def wait_for_redis(sender: Any, **kwargs: Any) -> None:
    Will raise WorkerShutdown to kill the celery worker if the timeout
    is reached."""

-    r = get_shared_redis_client()
+    r = get_redis_client(tenant_id=None)

    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60
@@ -310,7 +287,7 @@ def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None:
    # Set up variables for waiting on primary worker
    WAIT_INTERVAL = 5
    WAIT_LIMIT = 60
-    r = get_shared_redis_client()
+    r = get_redis_client(tenant_id=None)
    time_start = time.monotonic()

    logger.info("Waiting for primary worker to be ready...")
@@ -462,6 +439,24 @@ class TenantContextFilter(logging.Filter):
        return True


+@task_prerun.connect
+def set_tenant_id(
+    sender: Any | None = None,
+    task_id: str | None = None,
+    task: Task | None = None,
+    args: tuple[Any, ...] | None = None,
+    kwargs: dict[str, Any] | None = None,
+    **other_kwargs: Any,
+) -> None:
+    """Signal handler to set tenant ID in context var before task starts."""
+    tenant_id = (
+        kwargs.get("tenant_id", POSTGRES_DEFAULT_SCHEMA)
+        if kwargs
+        else POSTGRES_DEFAULT_SCHEMA
+    )
+    CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+
+
@task_postrun.connect
 def reset_tenant_id(
    sender: Any | None = None,
--- a/backend/onyx/background/celery/apps/beat.py
+++ b/backend/onyx/background/celery/apps/beat.py
@@ -132,7 +132,6 @@ class DynamicTenantScheduler(PersistentScheduler):
                        f"Adding options to task {tenant_task_name}: {options}"
                    )
                    tenant_task["options"] = options
-
                new_schedule[tenant_task_name] = tenant_task

        return new_schedule
@@ -257,4 +256,3 @@ def on_setup_logging(


 celery_app.conf.beat_scheduler = DynamicTenantScheduler
-celery_app.conf.task_default_base = app_base.TenantAwareTask
--- a/backend/onyx/background/celery/apps/heavy.py
+++ b/backend/onyx/background/celery/apps/heavy.py
@@ -20,7 +20,6 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.heavy")
-celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
--- a/backend/onyx/background/celery/apps/indexing.py
+++ b/backend/onyx/background/celery/apps/indexing.py
@@ -21,7 +21,6 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.indexing")
-celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
--- a/backend/onyx/background/celery/apps/light.py
+++ b/backend/onyx/background/celery/apps/light.py
@@ -23,7 +23,6 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.light")
-celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
--- a/backend/onyx/background/celery/apps/monitoring.py
+++ b/backend/onyx/background/celery/apps/monitoring.py
@@ -20,7 +20,6 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.monitoring")
-celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -24,7 +24,7 @@ from onyx.configs.constants import CELERY_PRIMARY_WORKER_LOCK_TIMEOUT
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.configs.constants import POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_default_tenant
 from onyx.db.engine import SqlEngine
 from onyx.db.index_attempt import get_index_attempt
 from onyx.db.index_attempt import mark_attempt_canceled
@@ -38,7 +38,7 @@ from onyx.redis.redis_connector_index import RedisConnectorIndex
 from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_connector_stop import RedisConnectorStop
 from onyx.redis.redis_document_set import RedisDocumentSet
-from onyx.redis.redis_pool import get_shared_redis_client
+from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_usergroup import RedisUserGroup
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import MULTI_TENANT
@@ -47,7 +47,6 @@ logger = setup_logger()

 celery_app = Celery(__name__)
 celery_app.config_from_object("onyx.background.celery.configs.primary")
-celery_app.Task = app_base.TenantAwareTask  # type: ignore [misc]


@signals.task_prerun.connect
@@ -102,7 +101,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:

    # This is singleton work that should be done on startup exactly once
    # by the primary worker. This is unnecessary in the multi tenant scenario
-    r = get_shared_redis_client()
+    r = get_redis_client(tenant_id=None)

    # Log the role and slave count - being connected to a slave or slave count > 0 could be problematic
    info: dict[str, Any] = cast(dict, r.info("replication"))
@@ -159,7 +158,7 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
    RedisConnectorExternalGroupSync.reset_all(r)

    # mark orphaned index attempts as failed
-    with get_session_with_current_tenant() as db_session:
+    with get_session_with_default_tenant() as db_session:
        unfenced_attempt_ids = get_unfenced_index_attempt_ids(db_session, r)
        for attempt_id in unfenced_attempt_ids:
            attempt = get_index_attempt(db_session, attempt_id)
@@ -235,7 +234,7 @@ class HubPeriodicTask(bootsteps.StartStopStep):

            lock: RedisLock = worker.primary_worker_lock

-            r = get_shared_redis_client()
+            r = get_redis_client(tenant_id=None)

            if lock.owned():
                task_logger.debug("Reacquiring primary worker lock.")
--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -36,15 +36,6 @@ beat_task_templates.extend(
                "expires": BEAT_EXPIRES_DEFAULT,
            },
        },
-        {
-            "name": "check-for-checkpoint-cleanup",
-            "task": OnyxCeleryTask.CHECK_FOR_CHECKPOINT_CLEANUP,
-            "schedule": timedelta(hours=1),
-            "options": {
-                "priority": OnyxCeleryPriority.LOW,
-                "expires": BEAT_EXPIRES_DEFAULT,
-            },
-        },
        {
            "name": "check-for-connector-deletion",
            "task": OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -27,7 +27,7 @@ from onyx.db.connector_credential_pair import get_connector_credential_pair_from
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
 from onyx.db.document import get_document_ids_for_connector_credential_pair
 from onyx.db.document_set import delete_document_set_cc_pair_relationship__no_commit
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
@@ -62,8 +62,8 @@ class TaskDependencyError(RuntimeError):
 def check_for_connector_deletion_task(
    self: Task, *, tenant_id: str | None
 ) -> bool | None:
-    r = get_redis_client()
-    r_replica = get_redis_replica_client()
+    r = get_redis_client(tenant_id=tenant_id)
+    r_replica = get_redis_replica_client(tenant_id=tenant_id)

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK,
@@ -77,14 +77,14 @@ def check_for_connector_deletion_task(
    try:
        # collect cc_pair_ids
        cc_pair_ids: list[int] = []
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pairs = get_connector_credential_pairs(db_session)
            for cc_pair in cc_pairs:
                cc_pair_ids.append(cc_pair.id)

        # try running cleanup on the cc_pair_ids
        for cc_pair_id in cc_pair_ids:
-            with get_session_with_current_tenant() as db_session:
+            with get_session_with_tenant(tenant_id) as db_session:
                redis_connector = RedisConnector(tenant_id, cc_pair_id)
                try:
                    try_generate_document_cc_pair_cleanup_tasks(
@@ -277,7 +277,7 @@ def monitor_connector_deletion_taskset(
        f"Connector deletion progress: cc_pair={cc_pair_id} remaining={remaining} initial={fence_data.num_tasks}"
    )
    if remaining > 0:
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            update_sync_record_status(
                db_session=db_session,
                entity_id=cc_pair_id,
@@ -287,7 +287,7 @@ def monitor_connector_deletion_taskset(
            )
        return

-    with get_session_with_current_tenant() as db_session:
+    with get_session_with_tenant(tenant_id) as db_session:
        cc_pair = get_connector_credential_pair_from_id(
            db_session=db_session,
            cc_pair_id=cc_pair_id,
--- a/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
+++ b/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
@@ -45,7 +45,7 @@ from onyx.configs.constants import OnyxRedisSignals
 from onyx.db.connector import mark_cc_pair_as_permissions_synced
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.document import upsert_document_by_connector_credential_pair
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import AccessType
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import SyncStatus
@@ -119,13 +119,13 @@ def _is_external_doc_permissions_sync_due(cc_pair: ConnectorCredentialPair) -> b
    soft_time_limit=JOB_TIMEOUT,
    bind=True,
 )
-def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None:
+def check_for_doc_permissions_sync(self: Task, *, tenant_id: str | None) -> bool | None:
    # TODO(rkuo): merge into check function after lookup table for fences is added

    # we need to use celery's redis client to access its redis data
    # (which lives on a different db number)
-    r = get_redis_client()
-    r_replica = get_redis_replica_client()
+    r = get_redis_client(tenant_id=tenant_id)
+    r_replica = get_redis_replica_client(tenant_id=tenant_id)
    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
@@ -140,7 +140,7 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None
    try:
        # get all cc pairs that need to be synced
        cc_pair_ids_to_sync: list[int] = []
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pairs = get_all_auto_sync_cc_pairs(db_session)

            for cc_pair in cc_pairs:
@@ -189,7 +189,7 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None

            key_str = key_bytes.decode("utf-8")
            if key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):
-                with get_session_with_current_tenant() as db_session:
+                with get_session_with_tenant(tenant_id) as db_session:
                    monitor_ccpair_permissions_taskset(
                        tenant_id, key_bytes, r, db_session
                    )
@@ -247,7 +247,7 @@ def try_creating_permissions_sync_task(
        # create before setting fence to avoid race condition where the monitoring
        # task updates the sync record before it is created
        try:
-            with get_session_with_current_tenant() as db_session:
+            with get_session_with_tenant(tenant_id) as db_session:
                insert_sync_record(
                    db_session=db_session,
                    entity_id=cc_pair_id,
@@ -321,7 +321,7 @@ def connector_permission_sync_generator_task(

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    # this wait is needed to avoid a race condition where
    # the primary worker sends the task and it is immediately executed
@@ -378,7 +378,7 @@ def connector_permission_sync_generator_task(
        return None

    try:
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
@@ -480,8 +480,7 @@ def update_external_document_permissions_task(
    external_access = document_external_access.external_access

    try:
-        with get_session_with_current_tenant() as db_session:
-            # Add the users to the DB if they don't exist
+        with get_session_with_tenant(tenant_id) as db_session:
            batch_add_ext_perm_user_if_not_exists(
                db_session=db_session,
                emails=list(external_access.external_user_emails),
--- a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py
+++ b/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py
@@ -39,7 +39,7 @@ from onyx.configs.constants import OnyxRedisLocks
 from onyx.configs.constants import OnyxRedisSignals
 from onyx.db.connector import mark_cc_pair_as_external_group_synced
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import AccessType
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import SyncStatus
@@ -122,8 +122,8 @@ def _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
 def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> bool | None:
    # we need to use celery's redis client to access its redis data
    # (which lives on a different db number)
-    r = get_redis_client()
-    r_replica = get_redis_replica_client()
+    r = get_redis_client(tenant_id=tenant_id)
+    r_replica = get_redis_replica_client(tenant_id=tenant_id)
    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
@@ -140,7 +140,7 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> bool

    try:
        cc_pair_ids_to_sync: list[int] = []
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pairs = get_all_auto_sync_cc_pairs(db_session)

            # We only want to sync one cc_pair per source type in
@@ -230,7 +230,7 @@ def try_creating_external_group_sync_task(
        # create before setting fence to avoid race condition where the monitoring
        # task updates the sync record before it is created
        try:
-            with get_session_with_current_tenant() as db_session:
+            with get_session_with_tenant(tenant_id) as db_session:
                insert_sync_record(
                    db_session=db_session,
                    entity_id=cc_pair_id,
@@ -296,7 +296,7 @@ def connector_external_group_sync_generator_task(

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    # this wait is needed to avoid a race condition where
    # the primary worker sends the task and it is immediately executed
@@ -357,11 +357,10 @@ def connector_external_group_sync_generator_task(
        payload.started = datetime.now(timezone.utc)
        redis_connector.external_group_sync.set_fence(payload)

-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_session,
                cc_pair_id=cc_pair_id,
-                eager_load_credential=True,
            )
            if cc_pair is None:
                raise ValueError(
@@ -385,7 +384,6 @@ def connector_external_group_sync_generator_task(
            logger.info(
                f"Syncing {len(external_user_groups)} external user groups for {source_type}"
            )
-            logger.debug(f"New external user groups: {external_user_groups}")

            replace_user__ext_group_for_cc_pair(
                db_session=db_session,
@@ -410,7 +408,7 @@ def connector_external_group_sync_generator_task(
        task_logger.exception(msg)
        emit_background_error(msg + f"\n\n{e}", cc_pair_id=cc_pair_id)

-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            update_sync_record_status(
                db_session=db_session,
                entity_id=cc_pair_id,
@@ -461,6 +459,7 @@ def validate_external_group_sync_fences(
        )

        lock_beat.reacquire()
+
    return


--- a/backend/onyx/background/celery/tasks/indexing/tasks.py
+++ b/backend/onyx/background/celery/tasks/indexing/tasks.py
@@ -1,10 +1,9 @@
 import multiprocessing
 import os
+import sys
 import time
-import traceback
 from datetime import datetime
 from datetime import timezone
-from enum import Enum
 from http import HTTPStatus
 from time import sleep
 from typing import Any
@@ -16,7 +15,6 @@ from celery import Task
 from celery.exceptions import SoftTimeLimitExceeded
 from celery.result import AsyncResult
 from celery.states import READY_STATES
-from pydantic import BaseModel
 from redis import Redis
 from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session
@@ -28,31 +26,22 @@ from onyx.background.celery.tasks.indexing.utils import get_unfenced_index_attem
 from onyx.background.celery.tasks.indexing.utils import IndexingCallback
 from onyx.background.celery.tasks.indexing.utils import try_creating_indexing_task
 from onyx.background.celery.tasks.indexing.utils import validate_indexing_fences
-from onyx.background.indexing.checkpointing_utils import cleanup_checkpoint
-from onyx.background.indexing.checkpointing_utils import (
-    get_index_attempts_with_old_checkpoints,
-)
-from onyx.background.indexing.job_client import SimpleJob
 from onyx.background.indexing.job_client import SimpleJobClient
-from onyx.background.indexing.job_client import SimpleJobException
 from onyx.background.indexing.run_indexing import run_indexing_entrypoint
 from onyx.configs.app_configs import MANAGED_VESPA
 from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
 from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_INDEXING_LOCK_TIMEOUT
-from onyx.configs.constants import CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT
 from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
-from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.configs.constants import OnyxRedisSignals
-from onyx.connectors.interfaces import ConnectorValidationError
 from onyx.db.connector import mark_ccpair_with_indexing_trigger
 from onyx.db.connector_credential_pair import fetch_connector_credential_pairs
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import IndexingMode
 from onyx.db.enums import IndexingStatus
 from onyx.db.index_attempt import get_index_attempt
@@ -81,134 +70,6 @@ from shared_configs.configs import SENTRY_DSN
 logger = setup_logger()


-class IndexingWatchdogTerminalStatus(str, Enum):
-    """The different statuses the watchdog can finish with.
-
-    TODO: create broader success/failure/abort categories
-    """
-
-    UNDEFINED = "undefined"
-
-    SUCCEEDED = "succeeded"
-
-    SPAWN_FAILED = "spawn_failed"  # connector spawn failed
-    SPAWN_NOT_ALIVE = (
-        "spawn_not_alive"  # spawn succeeded but process did not come alive
-    )
-
-    BLOCKED_BY_DELETION = "blocked_by_deletion"
-    BLOCKED_BY_STOP_SIGNAL = "blocked_by_stop_signal"
-    FENCE_NOT_FOUND = "fence_not_found"  # fence does not exist
-    FENCE_READINESS_TIMEOUT = (
-        "fence_readiness_timeout"  # fence exists but wasn't ready within the timeout
-    )
-    FENCE_MISMATCH = "fence_mismatch"  # task and fence metadata mismatch
-    TASK_ALREADY_RUNNING = "task_already_running"  # task appears to be running already
-    INDEX_ATTEMPT_MISMATCH = (
-        "index_attempt_mismatch"  # expected index attempt metadata not found in db
-    )
-
-    CONNECTOR_VALIDATION_ERROR = (
-        "connector_validation_error"  # the connector validation failed
-    )
-    CONNECTOR_EXCEPTIONED = "connector_exceptioned"  # the connector itself exceptioned
-    WATCHDOG_EXCEPTIONED = "watchdog_exceptioned"  # the watchdog exceptioned
-
-    # the watchdog received a termination signal
-    TERMINATED_BY_SIGNAL = "terminated_by_signal"
-
-    # the watchdog terminated the task due to no activity
-    TERMINATED_BY_ACTIVITY_TIMEOUT = "terminated_by_activity_timeout"
-
-    # NOTE: this may actually be the same as SIGKILL, but parsed differently by python
-    # consolidate once we know more
-    OUT_OF_MEMORY = "out_of_memory"
-
-    PROCESS_SIGNAL_SIGKILL = "process_signal_sigkill"
-
-    @property
-    def code(self) -> int:
-        _ENUM_TO_CODE: dict[IndexingWatchdogTerminalStatus, int] = {
-            IndexingWatchdogTerminalStatus.PROCESS_SIGNAL_SIGKILL: -9,
-            IndexingWatchdogTerminalStatus.OUT_OF_MEMORY: 137,
-            IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR: 247,
-            IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION: 248,
-            IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL: 249,
-            IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND: 250,
-            IndexingWatchdogTerminalStatus.FENCE_READINESS_TIMEOUT: 251,
-            IndexingWatchdogTerminalStatus.FENCE_MISMATCH: 252,
-            IndexingWatchdogTerminalStatus.TASK_ALREADY_RUNNING: 253,
-            IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH: 254,
-            IndexingWatchdogTerminalStatus.CONNECTOR_EXCEPTIONED: 255,
-        }
-
-        return _ENUM_TO_CODE[self]
-
-    @classmethod
-    def from_code(cls, code: int) -> "IndexingWatchdogTerminalStatus":
-        _CODE_TO_ENUM: dict[int, IndexingWatchdogTerminalStatus] = {
-            -9: IndexingWatchdogTerminalStatus.PROCESS_SIGNAL_SIGKILL,
-            137: IndexingWatchdogTerminalStatus.OUT_OF_MEMORY,
-            247: IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR,
-            248: IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION,
-            249: IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL,
-            250: IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND,
-            251: IndexingWatchdogTerminalStatus.FENCE_READINESS_TIMEOUT,
-            252: IndexingWatchdogTerminalStatus.FENCE_MISMATCH,
-            253: IndexingWatchdogTerminalStatus.TASK_ALREADY_RUNNING,
-            254: IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH,
-            255: IndexingWatchdogTerminalStatus.CONNECTOR_EXCEPTIONED,
-        }
-
-        if code in _CODE_TO_ENUM:
-            return _CODE_TO_ENUM[code]
-
-        return IndexingWatchdogTerminalStatus.UNDEFINED
-
-
-class SimpleJobResult:
-    """The data we want to have when the watchdog finishes"""
-
-    def __init__(self) -> None:
-        self.status = IndexingWatchdogTerminalStatus.UNDEFINED
-        self.connector_source = None
-        self.exit_code = None
-        self.exception_str = None
-
-    status: IndexingWatchdogTerminalStatus
-    connector_source: str | None
-    exit_code: int | None
-    exception_str: str | None
-
-
-class ConnectorIndexingContext(BaseModel):
-    tenant_id: str | None
-    cc_pair_id: int
-    search_settings_id: int
-    index_attempt_id: int
-
-
-class ConnectorIndexingLogBuilder:
-    def __init__(self, ctx: ConnectorIndexingContext):
-        self.ctx = ctx
-
-    def build(self, msg: str, **kwargs: Any) -> str:
-        msg_final = (
-            f"{msg}: "
-            f"tenant_id={self.ctx.tenant_id} "
-            f"attempt={self.ctx.index_attempt_id} "
-            f"cc_pair={self.ctx.cc_pair_id} "
-            f"search_settings={self.ctx.search_settings_id}"
-        )
-
-        # Append extra keyword arguments in logfmt style
-        if kwargs:
-            extra_logfmt = " ".join(f"{key}={value}" for key, value in kwargs.items())
-            msg_final = f"{msg_final} {extra_logfmt}"
-
-        return msg_final
-
-
 def monitor_ccpair_indexing_taskset(
    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
 ) -> None:
@@ -361,13 +222,12 @@ def monitor_ccpair_indexing_taskset(
 def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
    """a lightweight task used to kick off indexing tasks.
    Occcasionally does some validation of existing state to clear up error conditions"""
-
    time_start = time.monotonic()

    tasks_created = 0
    locked = False
-    redis_client = get_redis_client()
-    redis_client_replica = get_redis_replica_client()
+    redis_client = get_redis_client(tenant_id=tenant_id)
+    redis_client_replica = get_redis_replica_client(tenant_id=tenant_id)

    # we need to use celery's redis client to access its redis data
    # (which lives on a different db number)
@@ -405,7 +265,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
        # 1/3: KICKOFF

        # check for search settings swap
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            old_search_settings = check_index_swap(db_session=db_session)
            current_search_settings = get_current_search_settings(db_session)
            # So that the first time users aren't surprised by really slow speed of first
@@ -426,7 +286,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
        # gather cc_pair_ids
        lock_beat.reacquire()
        cc_pair_ids: list[int] = []
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pairs = fetch_connector_credential_pairs(db_session)
            for cc_pair_entry in cc_pairs:
                cc_pair_ids.append(cc_pair_entry.id)
@@ -436,7 +296,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
            lock_beat.reacquire()

            redis_connector = RedisConnector(tenant_id, cc_pair_id)
-            with get_session_with_current_tenant() as db_session:
+            with get_session_with_tenant(tenant_id) as db_session:
                search_settings_list = get_active_search_settings_list(db_session)
                for search_settings_instance in search_settings_list:
                    redis_connector_index = redis_connector.new_index(
@@ -514,7 +374,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:

        # Fail any index attempts in the DB that don't have fences
        # This shouldn't ever happen!
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            unfenced_attempt_ids = get_unfenced_index_attempt_ids(
                db_session, redis_client
            )
@@ -566,7 +426,7 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:

            key_str = key_bytes.decode("utf-8")
            if key_str.startswith(RedisConnectorIndex.FENCE_PREFIX):
-                with get_session_with_current_tenant() as db_session:
+                with get_session_with_tenant(tenant_id) as db_session:
                    monitor_ccpair_indexing_taskset(
                        tenant_id, key_bytes, redis_client_replica, db_session
                    )
@@ -597,8 +457,8 @@ def connector_indexing_task(
    index_attempt_id: int,
    cc_pair_id: int,
    search_settings_id: int,
-    is_ee: bool,
    tenant_id: str | None,
+    is_ee: bool,
 ) -> int | None:
    """Indexing task. For a cc pair, this task pulls all document IDs from the source
    and compares those IDs to locally stored documents and deletes all locally stored IDs missing
@@ -636,6 +496,7 @@ def connector_indexing_task(
        f"search_settings={search_settings_id}"
    )

+    attempt_found = False
    n_final_progress: int | None = None

    # 20 is the documented default for httpx max_keepalive_connections
@@ -649,24 +510,22 @@ def connector_indexing_task(
    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    redis_connector_index = redis_connector.new_index(search_settings_id)

-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    if redis_connector.delete.fenced:
-        raise SimpleJobException(
+        raise RuntimeError(
            f"Indexing will not start because connector deletion is in progress: "
            f"attempt={index_attempt_id} "
            f"cc_pair={cc_pair_id} "
-            f"fence={redis_connector.delete.fence_key}",
-            code=IndexingWatchdogTerminalStatus.BLOCKED_BY_DELETION.code,
+            f"fence={redis_connector.delete.fence_key}"
        )

    if redis_connector.stop.fenced:
-        raise SimpleJobException(
+        raise RuntimeError(
            f"Indexing will not start because a connector stop signal was detected: "
            f"attempt={index_attempt_id} "
            f"cc_pair={cc_pair_id} "
-            f"fence={redis_connector.stop.fence_key}",
-            code=IndexingWatchdogTerminalStatus.BLOCKED_BY_STOP_SIGNAL.code,
+            f"fence={redis_connector.stop.fence_key}"
        )

    # this wait is needed to avoid a race condition where
@@ -675,24 +534,19 @@ def connector_indexing_task(
    start = time.monotonic()
    while True:
        if time.monotonic() - start > CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT:
-            raise SimpleJobException(
+            raise ValueError(
                f"connector_indexing_task - timed out waiting for fence to be ready: "
-                f"fence={redis_connector.permissions.fence_key}",
-                code=IndexingWatchdogTerminalStatus.FENCE_READINESS_TIMEOUT.code,
+                f"fence={redis_connector.permissions.fence_key}"
            )

        if not redis_connector_index.fenced:  # The fence must exist
-            raise SimpleJobException(
-                f"connector_indexing_task - fence not found: fence={redis_connector_index.fence_key}",
-                code=IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND.code,
+            raise ValueError(
+                f"connector_indexing_task - fence not found: fence={redis_connector_index.fence_key}"
            )

        payload = redis_connector_index.payload  # The payload must exist
        if not payload:
-            raise SimpleJobException(
-                "connector_indexing_task: payload invalid or not found",
-                code=IndexingWatchdogTerminalStatus.FENCE_NOT_FOUND.code,
-            )
+            raise ValueError("connector_indexing_task: payload invalid or not found")

        if payload.index_attempt_id is None or payload.celery_task_id is None:
            logger.info(
@@ -702,11 +556,10 @@ def connector_indexing_task(
            continue

        if payload.index_attempt_id != index_attempt_id:
-            raise SimpleJobException(
+            raise ValueError(
                f"connector_indexing_task - id mismatch. Task may be left over from previous run.: "
                f"task_index_attempt={index_attempt_id} "
-                f"payload_index_attempt={payload.index_attempt_id}",
-                code=IndexingWatchdogTerminalStatus.FENCE_MISMATCH.code,
+                f"payload_index_attempt={payload.index_attempt_id}"
            )

        logger.info(
@@ -730,26 +583,19 @@ def connector_indexing_task(
            f"cc_pair={cc_pair_id} "
            f"search_settings={search_settings_id}"
        )
-
-        raise SimpleJobException(
-            f"Indexing task already running, exiting...: "
-            f"index_attempt={index_attempt_id} "
-            f"cc_pair={cc_pair_id} "
-            f"search_settings={search_settings_id}",
-            code=IndexingWatchdogTerminalStatus.TASK_ALREADY_RUNNING.code,
-        )
+        return None

    payload.started = datetime.now(timezone.utc)
    redis_connector_index.set_fence(payload)

    try:
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            attempt = get_index_attempt(db_session, index_attempt_id)
            if not attempt:
-                raise SimpleJobException(
-                    f"Index attempt not found: index_attempt={index_attempt_id}",
-                    code=IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH.code,
+                raise ValueError(
+                    f"Index attempt not found: index_attempt={index_attempt_id}"
                )
+            attempt_found = True

            cc_pair = get_connector_credential_pair_from_id(
                db_session=db_session,
@@ -757,30 +603,25 @@ def connector_indexing_task(
            )

            if not cc_pair:
-                raise SimpleJobException(
-                    f"cc_pair not found: cc_pair={cc_pair_id}",
-                    code=IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH.code,
-                )
+                raise ValueError(f"cc_pair not found: cc_pair={cc_pair_id}")

            if not cc_pair.connector:
-                raise SimpleJobException(
-                    f"Connector not found: cc_pair={cc_pair_id} connector={cc_pair.connector_id}",
-                    code=IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH.code,
+                raise ValueError(
+                    f"Connector not found: cc_pair={cc_pair_id} connector={cc_pair.connector_id}"
                )

            if not cc_pair.credential:
-                raise SimpleJobException(
-                    f"Credential not found: cc_pair={cc_pair_id} credential={cc_pair.credential_id}",
-                    code=IndexingWatchdogTerminalStatus.INDEX_ATTEMPT_MISMATCH.code,
+                raise ValueError(
+                    f"Credential not found: cc_pair={cc_pair_id} credential={cc_pair.credential_id}"
                )

        # define a callback class
        callback = IndexingCallback(
            os.getppid(),
            redis_connector,
+            redis_connector_index,
            lock,
            r,
-            redis_connector_index,
        )

        logger.info(
@@ -802,15 +643,6 @@ def connector_indexing_task(
        # get back the total number of indexed docs and return it
        n_final_progress = redis_connector_index.get_progress()
        redis_connector_index.set_generator_complete(HTTPStatus.OK.value)
-    except ConnectorValidationError:
-        raise SimpleJobException(
-            f"Indexing task failed: attempt={index_attempt_id} "
-            f"tenant={tenant_id} "
-            f"cc_pair={cc_pair_id} "
-            f"search_settings={search_settings_id}",
-            code=IndexingWatchdogTerminalStatus.CONNECTOR_VALIDATION_ERROR.code,
-        )
-
    except Exception as e:
        logger.exception(
            f"Indexing spawned task failed: attempt={index_attempt_id} "
@@ -818,8 +650,22 @@ def connector_indexing_task(
            f"cc_pair={cc_pair_id} "
            f"search_settings={search_settings_id}"
        )
-        raise e
+        if attempt_found:
+            try:
+                with get_session_with_tenant(tenant_id) as db_session:
+                    mark_attempt_failed(
+                        index_attempt_id, db_session, failure_reason=str(e)
+                    )
+            except Exception:
+                logger.exception(
+                    "Indexing watchdog - transient exception looking up index attempt: "
+                    f"attempt={index_attempt_id} "
+                    f"tenant={tenant_id} "
+                    f"cc_pair={cc_pair_id} "
+                    f"search_settings={search_settings_id}"
+                )

+        raise e
    finally:
        if lock.owned():
            lock.release()
@@ -832,49 +678,41 @@ def connector_indexing_task(
    return n_final_progress


-def process_job_result(
-    job: SimpleJob,
-    connector_source: str | None,
-    redis_connector_index: RedisConnectorIndex,
-    log_builder: ConnectorIndexingLogBuilder,
-) -> SimpleJobResult:
-    result = SimpleJobResult()
-    result.connector_source = connector_source
+def connector_indexing_task_wrapper(
+    index_attempt_id: int,
+    cc_pair_id: int,
+    search_settings_id: int,
+    tenant_id: str | None,
+    is_ee: bool,
+) -> int | None:
+    """Just wraps connector_indexing_task so we can log any exceptions before
+    re-raising it."""
+    result: int | None = None

-    if job.process:
-        result.exit_code = job.process.exitcode
-
-    if job.status != "error":
-        result.status = IndexingWatchdogTerminalStatus.SUCCEEDED
-        return result
-
-    ignore_exitcode = False
-
-    # In EKS, there is an edge case where successful tasks return exit
-    # code 1 in the cloud due to the set_spawn_method not sticking.
-    # We've since worked around this, but the following is a safe way to
-    # work around this issue. Basically, we ignore the job error state
-    # if the completion signal is OK.
-    status_int = redis_connector_index.get_completion()
-    if status_int:
-        status_enum = HTTPStatus(status_int)
-        if status_enum == HTTPStatus.OK:
-            ignore_exitcode = True
-
-    if ignore_exitcode:
-        result.status = IndexingWatchdogTerminalStatus.SUCCEEDED
-        task_logger.warning(
-            log_builder.build(
-                "Indexing watchdog - spawned task has non-zero exit code "
-                "but completion signal is OK. Continuing...",
-                exit_code=str(result.exit_code),
-            )
+    try:
+        result = connector_indexing_task(
+            index_attempt_id,
+            cc_pair_id,
+            search_settings_id,
+            tenant_id,
+            is_ee,
+        )
+    except Exception:
+        logger.exception(
+            f"connector_indexing_task exceptioned: "
+            f"tenant={tenant_id} "
+            f"index_attempt={index_attempt_id} "
+            f"cc_pair={cc_pair_id} "
+            f"search_settings={search_settings_id}"
        )
-    else:
-        if result.exit_code is not None:
-            result.status = IndexingWatchdogTerminalStatus.from_code(result.exit_code)

-        result.exception_str = job.exception()
+        # There is a cloud related bug outside of our code
+        # where spawned tasks return with an exit code of 1.
+        # Unfortunately, exceptions also return with an exit code of 1,
+        # so just raising an exception isn't informative
+        # Exiting with 255 makes it possible to distinguish between normal exits
+        # and exceptions.
+        sys.exit(255)

    return result

@@ -892,32 +730,12 @@ def connector_indexing_proxy_task(
    search_settings_id: int,
    tenant_id: str | None,
 ) -> None:
-    """celery out of process task execution strategy is pool=prefork, but it uses fork,
-    and forking is inherently unstable.
-
-    To work around this, we use pool=threads and proxy our work to a spawned task.
-
-    TODO(rkuo): refactor this so that there is a single return path where we canonically
-    log the result of running this function.
-    """
-    start = time.monotonic()
-
-    result = SimpleJobResult()
-
-    ctx = ConnectorIndexingContext(
-        tenant_id=tenant_id,
-        cc_pair_id=cc_pair_id,
-        search_settings_id=search_settings_id,
-        index_attempt_id=index_attempt_id,
-    )
-
-    log_builder = ConnectorIndexingLogBuilder(ctx)
-
+    """celery tasks are forked, but forking is unstable.  This proxies work to a spawned task."""
    task_logger.info(
-        log_builder.build(
-            "Indexing watchdog - starting",
-            mp_start_method=str(multiprocessing.get_start_method()),
-        )
+        f"Indexing watchdog - starting: attempt={index_attempt_id} "
+        f"cc_pair={cc_pair_id} "
+        f"search_settings={search_settings_id} "
+        f"mp_start_method={multiprocessing.get_start_method()}"
    )

    if not self.request.id:
@@ -926,297 +744,149 @@ def connector_indexing_proxy_task(
    client = SimpleJobClient()

    job = client.submit(
-        connector_indexing_task,
+        connector_indexing_task_wrapper,
        index_attempt_id,
        cc_pair_id,
        search_settings_id,
-        global_version.is_ee_version(),
        tenant_id,
+        global_version.is_ee_version(),
+        pure=False,
    )

-    if not job or not job.process:
-        result.status = IndexingWatchdogTerminalStatus.SPAWN_FAILED
+    if not job:
        task_logger.info(
-            log_builder.build(
-                "Indexing watchdog - finished",
-                status=str(result.status.value),
-                exit_code=str(result.exit_code),
-            )
+            f"Indexing watchdog - spawn failed: attempt={index_attempt_id} "
+            f"cc_pair={cc_pair_id} "
+            f"search_settings={search_settings_id}"
        )
        return

-    # Ensure the process has moved out of the starting state
-    num_waits = 0
-    while True:
-        if num_waits > 15:
-            result.status = IndexingWatchdogTerminalStatus.SPAWN_NOT_ALIVE
-            task_logger.info(
-                log_builder.build(
-                    "Indexing watchdog - finished",
-                    status=str(result.status.value),
-                    exit_code=str(result.exit_code),
-                )
-            )
-            job.release()
-            return
-
-        if job.process.is_alive() or job.process.exitcode is not None:
-            break
-
-        sleep(1)
-        num_waits += 1
-
    task_logger.info(
-        log_builder.build(
-            "Indexing watchdog - spawn succeeded",
-            pid=str(job.process.pid),
-        )
+        f"Indexing watchdog - spawn succeeded: attempt={index_attempt_id} "
+        f"cc_pair={cc_pair_id} "
+        f"search_settings={search_settings_id}"
    )

    redis_connector = RedisConnector(tenant_id, cc_pair_id)
    redis_connector_index = redis_connector.new_index(search_settings_id)

-    try:
-        with get_session_with_current_tenant() as db_session:
-            index_attempt = get_index_attempt(
-                db_session=db_session, index_attempt_id=index_attempt_id
-            )
-            if not index_attempt:
-                raise RuntimeError("Index attempt not found")
+    while True:
+        sleep(5)

-            result.connector_source = (
-                index_attempt.connector_credential_pair.connector.source.value
-            )
+        # renew watchdog signal (this has a shorter timeout than set_active)
+        redis_connector_index.set_watchdog(True)

-        redis_connector_index.set_active()  # renew active signal
-        redis_connector_index.set_connector_active()  # prime the connective active signal
+        # renew active signal
+        redis_connector_index.set_active()

-        while True:
-            sleep(5)
-
-            # renew watchdog signal (this has a shorter timeout than set_active)
-            redis_connector_index.set_watchdog(True)
-
-            # renew active signal
-            redis_connector_index.set_active()
-
-            # if the job is done, clean up and break
-            if job.done():
-                try:
-                    result = process_job_result(
-                        job, result.connector_source, redis_connector_index, log_builder
-                    )
-                except Exception:
-                    task_logger.exception(
-                        log_builder.build(
-                            "Indexing watchdog - spawned task exceptioned"
-                        )
-                    )
-                finally:
-                    job.release()
-                    break
-
-            # if a termination signal is detected, clean up and break
-            if self.request.id and redis_connector_index.terminating(self.request.id):
-                task_logger.warning(
-                    log_builder.build("Indexing watchdog - termination signal detected")
-                )
-
-                result.status = IndexingWatchdogTerminalStatus.TERMINATED_BY_SIGNAL
-                break
-
-            if not redis_connector_index.connector_active():
-                task_logger.warning(
-                    log_builder.build(
-                        "Indexing watchdog - activity timeout exceeded",
-                        timeout=f"{CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT}s",
-                    )
-                )
-
-                try:
-                    with get_session_with_current_tenant() as db_session:
-                        mark_attempt_failed(
-                            index_attempt_id,
-                            db_session,
-                            "Indexing watchdog - activity timeout exceeded: "
-                            f"attempt={index_attempt_id} "
-                            f"timeout={CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT}s",
-                        )
-                except Exception:
-                    # if the DB exceptions, we'll just get an unfriendly failure message
-                    # in the UI instead of the cancellation message
-                    logger.exception(
-                        log_builder.build(
-                            "Indexing watchdog - transient exception marking index attempt as failed"
-                        )
-                    )
-
-                job.cancel()
-                result.status = (
-                    IndexingWatchdogTerminalStatus.TERMINATED_BY_ACTIVITY_TIMEOUT
-                )
-                break
-
-            # if the spawned task is still running, restart the check once again
-            # if the index attempt is not in a finished status
+        # if the job is done, clean up and break
+        if job.done():
+            exit_code: int | None
            try:
-                with get_session_with_current_tenant() as db_session:
-                    index_attempt = get_index_attempt(
-                        db_session=db_session, index_attempt_id=index_attempt_id
+                if job.status == "error":
+                    ignore_exitcode = False
+
+                    exit_code = None
+                    if job.process:
+                        exit_code = job.process.exitcode
+
+                    # seeing odd behavior where spawned tasks usually return exit code 1 in the cloud,
+                    # even though logging clearly indicates successful completion
+                    # to work around this, we ignore the job error state if the completion signal is OK
+                    status_int = redis_connector_index.get_completion()
+                    if status_int:
+                        status_enum = HTTPStatus(status_int)
+                        if status_enum == HTTPStatus.OK:
+                            ignore_exitcode = True
+
+                    if not ignore_exitcode:
+                        raise RuntimeError("Spawned task exceptioned.")
+
+                    task_logger.warning(
+                        "Indexing watchdog - spawned task has non-zero exit code "
+                        "but completion signal is OK. Continuing...: "
+                        f"attempt={index_attempt_id} "
+                        f"tenant={tenant_id} "
+                        f"cc_pair={cc_pair_id} "
+                        f"search_settings={search_settings_id} "
+                        f"exit_code={exit_code}"
                    )
-
-                    if not index_attempt:
-                        continue
-
-                    if not index_attempt.is_finished():
-                        continue
            except Exception:
-                # if the DB exceptioned, just restart the check.
-                # polling the index attempt status doesn't need to be strongly consistent
-                task_logger.exception(
-                    log_builder.build(
-                        "Indexing watchdog - transient exception looking up index attempt"
+                task_logger.error(
+                    "Indexing watchdog - spawned task exceptioned: "
+                    f"attempt={index_attempt_id} "
+                    f"tenant={tenant_id} "
+                    f"cc_pair={cc_pair_id} "
+                    f"search_settings={search_settings_id} "
+                    f"exit_code={exit_code} "
+                    f"error={job.exception()}"
+                )
+
+                raise
+            finally:
+                job.release()
+
+            break
+
+        # if a termination signal is detected, clean up and break
+        if self.request.id and redis_connector_index.terminating(self.request.id):
+            task_logger.warning(
+                "Indexing watchdog - termination signal detected: "
+                f"attempt={index_attempt_id} "
+                f"cc_pair={cc_pair_id} "
+                f"search_settings={search_settings_id}"
+            )
+
+            try:
+                with get_session_with_tenant(tenant_id) as db_session:
+                    mark_attempt_canceled(
+                        index_attempt_id,
+                        db_session,
+                        "Connector termination signal detected",
                    )
+            except Exception:
+                # if the DB exceptions, we'll just get an unfriendly failure message
+                # in the UI instead of the cancellation message
+                logger.exception(
+                    "Indexing watchdog - transient exception marking index attempt as canceled: "
+                    f"attempt={index_attempt_id} "
+                    f"tenant={tenant_id} "
+                    f"cc_pair={cc_pair_id} "
+                    f"search_settings={search_settings_id}"
                )
-                continue
-    except Exception as e:
-        result.status = IndexingWatchdogTerminalStatus.WATCHDOG_EXCEPTIONED
-        if isinstance(e, ConnectorValidationError):
-            # No need to expose full stack trace for validation errors
-            result.exception_str = str(e)
-        else:
-            result.exception_str = traceback.format_exc()

-    # handle exit and reporting
-    elapsed = time.monotonic() - start
-    if result.exception_str is not None:
-        # print with exception
+            job.cancel()
+            break
+
+        # if the spawned task is still running, restart the check once again
+        # if the index attempt is not in a finished status
        try:
-            with get_session_with_current_tenant() as db_session:
-                failure_reason = (
-                    f"Spawned task exceptioned: exit_code={result.exit_code}"
-                )
-                mark_attempt_failed(
-                    ctx.index_attempt_id,
-                    db_session,
-                    failure_reason=failure_reason,
-                    full_exception_trace=result.exception_str,
+            with get_session_with_tenant(tenant_id) as db_session:
+                index_attempt = get_index_attempt(
+                    db_session=db_session, index_attempt_id=index_attempt_id
                )
+
+                if not index_attempt:
+                    continue
+
+                if not index_attempt.is_finished():
+                    continue
        except Exception:
-            task_logger.exception(
-                log_builder.build(
-                    "Indexing watchdog - transient exception marking index attempt as failed"
-                )
+            # if the DB exceptioned, just restart the check.
+            # polling the index attempt status doesn't need to be strongly consistent
+            logger.exception(
+                "Indexing watchdog - transient exception looking up index attempt: "
+                f"attempt={index_attempt_id} "
+                f"tenant={tenant_id} "
+                f"cc_pair={cc_pair_id} "
+                f"search_settings={search_settings_id}"
            )
-
-        normalized_exception_str = "None"
-        if result.exception_str:
-            normalized_exception_str = result.exception_str.replace(
-                "\n", "\\n"
-            ).replace('"', '\\"')
-
-        task_logger.warning(
-            log_builder.build(
-                "Indexing watchdog - finished",
-                source=result.connector_source,
-                status=result.status.value,
-                exit_code=str(result.exit_code),
-                exception=f'"{normalized_exception_str}"',
-                elapsed=f"{elapsed:.2f}s",
-            )
-        )
-
-        redis_connector_index.set_watchdog(False)
-        raise RuntimeError(f"Exception encountered: traceback={result.exception_str}")
-
-    # print without exception
-    if result.status == IndexingWatchdogTerminalStatus.TERMINATED_BY_SIGNAL:
-        try:
-            with get_session_with_current_tenant() as db_session:
-                mark_attempt_canceled(
-                    index_attempt_id,
-                    db_session,
-                    "Connector termination signal detected",
-                )
-        except Exception:
-            # if the DB exceptions, we'll just get an unfriendly failure message
-            # in the UI instead of the cancellation message
-            task_logger.exception(
-                log_builder.build(
-                    "Indexing watchdog - transient exception marking index attempt as canceled"
-                )
-            )
-
-        job.cancel()
-
-    task_logger.info(
-        log_builder.build(
-            "Indexing watchdog - finished",
-            source=result.connector_source,
-            status=str(result.status.value),
-            exit_code=str(result.exit_code),
-            elapsed=f"{elapsed:.2f}s",
-        )
-    )
+            continue

    redis_connector_index.set_watchdog(False)
-    return
-
-
-@shared_task(
-    name=OnyxCeleryTask.CHECK_FOR_CHECKPOINT_CLEANUP,
-    soft_time_limit=300,
-)
-def check_for_checkpoint_cleanup(*, tenant_id: str | None) -> None:
-    """Clean up old checkpoints that are older than 7 days."""
-    locked = False
-    redis_client = get_redis_client(tenant_id=tenant_id)
-    lock: RedisLock = redis_client.lock(
-        OnyxRedisLocks.CHECK_CHECKPOINT_CLEANUP_BEAT_LOCK,
-        timeout=CELERY_GENERIC_BEAT_LOCK_TIMEOUT,
+    task_logger.info(
+        f"Indexing watchdog - finished: attempt={index_attempt_id} "
+        f"cc_pair={cc_pair_id} "
+        f"search_settings={search_settings_id}"
    )
-
-    # these tasks should never overlap
-    if not lock.acquire(blocking=False):
-        return None
-
-    try:
-        locked = True
-        with get_session_with_current_tenant() as db_session:
-            old_attempts = get_index_attempts_with_old_checkpoints(db_session)
-            for attempt in old_attempts:
-                task_logger.info(
-                    f"Cleaning up checkpoint for index attempt {attempt.id}"
-                )
-                cleanup_checkpoint_task.apply_async(
-                    kwargs={
-                        "index_attempt_id": attempt.id,
-                        "tenant_id": tenant_id,
-                    },
-                    queue=OnyxCeleryQueues.CHECKPOINT_CLEANUP,
-                )
-
-    except Exception:
-        task_logger.exception("Unexpected exception during checkpoint cleanup")
-        return None
-    finally:
-        if locked:
-            if lock.owned():
-                lock.release()
-            else:
-                task_logger.error(
-                    "check_for_checkpoint_cleanup - Lock not owned on completion: "
-                    f"tenant={tenant_id}"
-                )
-
-
-@shared_task(
-    name=OnyxCeleryTask.CLEANUP_CHECKPOINT,
-    bind=True,
-)
-def cleanup_checkpoint_task(
-    self: Task, *, index_attempt_id: int, tenant_id: str | None
-) -> None:
-    """Clean up a checkpoint for a given index attempt"""
-    with get_session_with_current_tenant() as db_session:
-        cleanup_checkpoint(db_session, index_attempt_id)
+    return
--- a/backend/onyx/background/celery/tasks/indexing/utils.py
+++ b/backend/onyx/background/celery/tasks/indexing/utils.py
@@ -23,7 +23,7 @@ from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.db.engine import get_db_current_time
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import IndexingStatus
 from onyx.db.enums import IndexModelStatus
@@ -93,25 +93,27 @@ def get_unfenced_index_attempt_ids(db_session: Session, r: redis.Redis) -> list[
    return unfenced_attempts


-class IndexingCallbackBase(IndexingHeartbeatInterface):
+class IndexingCallback(IndexingHeartbeatInterface):
    PARENT_CHECK_INTERVAL = 60

    def __init__(
        self,
        parent_pid: int,
        redis_connector: RedisConnector,
+        redis_connector_index: RedisConnectorIndex,
        redis_lock: RedisLock,
        redis_client: Redis,
    ):
        super().__init__()
        self.parent_pid = parent_pid
        self.redis_connector: RedisConnector = redis_connector
+        self.redis_connector_index: RedisConnectorIndex = redis_connector_index
        self.redis_lock: RedisLock = redis_lock
        self.redis_client = redis_client
        self.started: datetime = datetime.now(timezone.utc)
        self.redis_lock.reacquire()

-        self.last_tag: str = f"{self.__class__.__name__}.__init__"
+        self.last_tag: str = "IndexingCallback.__init__"
        self.last_lock_reacquire: datetime = datetime.now(timezone.utc)
        self.last_lock_monotonic = time.monotonic()

@@ -125,8 +127,8 @@ class IndexingCallbackBase(IndexingHeartbeatInterface):

    def progress(self, tag: str, amount: int) -> None:
        # rkuo: this shouldn't be necessary yet because we spawn the process this runs inside
-        # with daemon=True. It seems likely some indexing tasks will need to spawn other processes
-        # eventually, which daemon=True prevents, so leave this code in until we're ready to test it.
+        # with daemon = True. It seems likely some indexing tasks will need to spawn other processes eventually
+        # so leave this code in until we're ready to test it.

        # if self.parent_pid:
        #     # check if the parent pid is alive so we aren't running as a zombie
@@ -141,6 +143,8 @@ class IndexingCallbackBase(IndexingHeartbeatInterface):
        #         self.last_parent_check = now

        try:
+            self.redis_connector.prune.set_active()
+
            current_time = time.monotonic()
            if current_time - self.last_lock_monotonic >= (
                CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4
@@ -152,7 +156,7 @@ class IndexingCallbackBase(IndexingHeartbeatInterface):
            self.last_tag = tag
        except LockError:
            logger.exception(
-                f"{self.__class__.__name__} - lock.reacquire exceptioned: "
+                f"IndexingCallback - lock.reacquire exceptioned: "
                f"lock_timeout={self.redis_lock.timeout} "
                f"start={self.started} "
                f"last_tag={self.last_tag} "
@@ -163,24 +167,6 @@ class IndexingCallbackBase(IndexingHeartbeatInterface):
            redis_lock_dump(self.redis_lock, self.redis_client)
            raise

-
-class IndexingCallback(IndexingCallbackBase):
-    def __init__(
-        self,
-        parent_pid: int,
-        redis_connector: RedisConnector,
-        redis_lock: RedisLock,
-        redis_client: Redis,
-        redis_connector_index: RedisConnectorIndex,
-    ):
-        super().__init__(parent_pid, redis_connector, redis_lock, redis_client)
-
-        self.redis_connector_index: RedisConnectorIndex = redis_connector_index
-
-    def progress(self, tag: str, amount: int) -> None:
-        self.redis_connector_index.set_active()
-        self.redis_connector_index.set_connector_active()
-        super().progress(tag, amount)
        self.redis_client.incrby(
            self.redis_connector_index.generator_progress_key, amount
        )
@@ -254,8 +240,7 @@ def validate_indexing_fence(
        # it would be odd to get here as there isn't that much that can go wrong during
        # initial fence setup, but it's still worth making sure we can recover
        logger.info(
-            f"validate_indexing_fence - "
-            f"Resetting fence in basic state without any activity: fence={fence_key}"
+            f"validate_indexing_fence - Resetting fence in basic state without any activity: fence={fence_key}"
        )
        redis_connector_index.reset()
        return
@@ -332,7 +317,7 @@ def validate_indexing_fences(
        if not key_str.startswith(RedisConnectorIndex.FENCE_PREFIX):
            continue

-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            validate_indexing_fence(
                tenant_id,
                key_bytes,
--- a/backend/onyx/background/celery/tasks/llm_model_update/tasks.py
+++ b/backend/onyx/background/celery/tasks/llm_model_update/tasks.py
@@ -8,7 +8,7 @@ from onyx.background.celery.apps.app_base import task_logger
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.app_configs import LLM_MODEL_UPDATE_API_URL
 from onyx.configs.constants import OnyxCeleryTask
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.models import LLMProvider


@@ -75,7 +75,7 @@ def check_for_llm_model_update(self: Task, *, tenant_id: str | None) -> bool | N
        return None

    # Then update the database with the fetched models
-    with get_session_with_current_tenant() as db_session:
+    with get_session_with_tenant(tenant_id) as db_session:
        # Get the default LLM provider
        default_provider = (
            db_session.query(LLMProvider)
--- a/backend/onyx/background/celery/tasks/monitoring/tasks.py
+++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py
@@ -26,8 +26,7 @@ from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.db.engine import get_all_tenant_ids
 from onyx.db.engine import get_db_current_time
-from onyx.db.engine import get_session_with_current_tenant
-from onyx.db.engine import get_session_with_shared_schema
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import IndexingStatus
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
@@ -43,6 +42,7 @@ from onyx.utils.telemetry import optional_telemetry
 from onyx.utils.telemetry import RecordType
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

+
 _MONITORING_SOFT_TIME_LIMIT = 60 * 5  # 5 minutes
 _MONITORING_TIME_LIMIT = _MONITORING_SOFT_TIME_LIMIT + 60  # 6 minutes

@@ -190,9 +190,9 @@ def _build_connector_start_latency_metric(
        desired_start_time = cc_pair.connector.time_created
    else:
        if not cc_pair.connector.refresh_freq:
-            task_logger.debug(
-                "Connector has no refresh_freq and this is a non-initial index attempt. "
-                "Assuming user manually triggered indexing, so we'll skip start latency metric."
+            task_logger.error(
+                "Found non-initial index attempt for connector "
+                "without refresh_freq. This should never happen."
            )
            return None

@@ -668,7 +668,7 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
        CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)

    task_logger.info("Starting background monitoring")
-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    lock_monitoring: RedisLock = r.lock(
        OnyxRedisLocks.MONITOR_BACKGROUND_PROCESSES_LOCK,
@@ -683,7 +683,7 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
    try:
        # Get Redis client for Celery broker
        redis_celery = self.app.broker_connection().channel().client  # type: ignore
-        redis_std = get_redis_client()
+        redis_std = get_redis_client(tenant_id=tenant_id)

        # Define metric collection functions and their dependencies
        metric_functions: list[Callable[[], list[Metric]]] = [
@@ -693,7 +693,7 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:
        ]

        # Collect and log each metric
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            for metric_fn in metric_functions:
                metrics = metric_fn()
                for metric in metrics:
@@ -771,11 +771,12 @@ def cloud_check_alembic() -> bool | None:
            if tenant_id is None:
                continue

-            with get_session_with_shared_schema() as session:
+            with get_session_with_tenant(tenant_id=None) as session:
                try:
                    result = session.execute(
                        text(f'SELECT * FROM "{tenant_id}".alembic_version LIMIT 1')
                    )
+
                    result_scalar: str | None = result.scalar_one_or_none()
                    if result_scalar is None:
                        raise ValueError("Alembic version should not be None.")
--- a/backend/onyx/background/celery/tasks/periodic/tasks.py
+++ b/backend/onyx/background/celery/tasks/periodic/tasks.py
@@ -15,7 +15,7 @@ from onyx.background.celery.apps.app_base import task_logger
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import PostgresAdvisoryLocks
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant


@shared_task(
@@ -36,7 +36,7 @@ def kombu_message_cleanup_task(self: Any, tenant_id: str | None) -> int:
    ctx["deleted"] = 0
    ctx["cleanup_age"] = KOMBU_MESSAGE_CLEANUP_AGE
    ctx["page_limit"] = KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT
-    with get_session_with_current_tenant() as db_session:
+    with get_session_with_tenant(tenant_id) as db_session:
        # Exit the task if we can't take the advisory lock
        result = db_session.execute(
            text("SELECT pg_try_advisory_lock(:id)"),
--- a/backend/onyx/background/celery/tasks/pruning/tasks.py
+++ b/backend/onyx/background/celery/tasks/pruning/tasks.py
@@ -21,7 +21,7 @@ from onyx.background.celery.celery_redis import celery_get_queue_length
 from onyx.background.celery.celery_redis import celery_get_queued_task_ids
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.background.celery.celery_utils import extract_ids_from_runnable_connector
-from onyx.background.celery.tasks.indexing.utils import IndexingCallbackBase
+from onyx.background.celery.tasks.indexing.utils import IndexingCallback
 from onyx.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
@@ -41,7 +41,7 @@ from onyx.db.connector_credential_pair import get_connector_credential_pair
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
 from onyx.db.document import get_documents_for_connector_credential_pair
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
@@ -62,12 +62,6 @@ from onyx.utils.logger import setup_logger
 logger = setup_logger()


-class PruneCallback(IndexingCallbackBase):
-    def progress(self, tag: str, amount: int) -> None:
-        self.redis_connector.prune.set_active()
-        super().progress(tag, amount)
-
-
 """Jobs / utils for kicking off pruning tasks."""


@@ -114,8 +108,8 @@ def _is_pruning_due(cc_pair: ConnectorCredentialPair) -> bool:
    bind=True,
 )
 def check_for_pruning(self: Task, *, tenant_id: str | None) -> bool | None:
-    r = get_redis_client()
-    r_replica = get_redis_replica_client()
+    r = get_redis_client(tenant_id=tenant_id)
+    r_replica = get_redis_replica_client(tenant_id=tenant_id)
    r_celery: Redis = self.app.broker_connection().channel().client  # type: ignore

    lock_beat: RedisLock = r.lock(
@@ -133,14 +127,14 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> bool | None:
        # but pruning only kicks off once per hour
        if not r.exists(OnyxRedisSignals.BLOCK_PRUNING):
            cc_pair_ids: list[int] = []
-            with get_session_with_current_tenant() as db_session:
+            with get_session_with_tenant(tenant_id) as db_session:
                cc_pairs = get_connector_credential_pairs(db_session)
                for cc_pair_entry in cc_pairs:
                    cc_pair_ids.append(cc_pair_entry.id)

            for cc_pair_id in cc_pair_ids:
                lock_beat.reacquire()
-                with get_session_with_current_tenant() as db_session:
+                with get_session_with_tenant(tenant_id) as db_session:
                    cc_pair = get_connector_credential_pair_from_id(
                        db_session=db_session,
                        cc_pair_id=cc_pair_id,
@@ -188,7 +182,7 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> bool | None:

            key_str = key_bytes.decode("utf-8")
            if key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):
-                with get_session_with_current_tenant() as db_session:
+                with get_session_with_tenant(tenant_id) as db_session:
                    monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session)
    except SoftTimeLimitExceeded:
        task_logger.info(
@@ -343,7 +337,7 @@ def connector_pruning_generator_task(

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

-    r = get_redis_client()
+    r = get_redis_client(tenant_id=tenant_id)

    # this wait is needed to avoid a race condition where
    # the primary worker sends the task and it is immediately executed
@@ -401,7 +395,7 @@ def connector_pruning_generator_task(
        return None

    try:
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            cc_pair = get_connector_credential_pair(
                db_session=db_session,
                connector_id=connector_id,
@@ -431,7 +425,6 @@ def connector_pruning_generator_task(
                f"cc_pair={cc_pair_id} "
                f"connector_source={cc_pair.connector.source}"
            )
-
            runnable_connector = instantiate_connector(
                db_session,
                cc_pair.connector.source,
@@ -441,11 +434,12 @@ def connector_pruning_generator_task(
            )

            search_settings = get_current_search_settings(db_session)
-            redis_connector.new_index(search_settings.id)
+            redis_connector_index = redis_connector.new_index(search_settings.id)

-            callback = PruneCallback(
+            callback = IndexingCallback(
                0,
                redis_connector,
+                redis_connector_index,
                lock,
                r,
            )
--- a/backend/onyx/background/celery/tasks/shared/tasks.py
+++ b/backend/onyx/background/celery/tasks/shared/tasks.py
@@ -27,7 +27,7 @@ from onyx.db.document import mark_document_as_modified
 from onyx.db.document import mark_document_as_synced
 from onyx.db.document_set import fetch_document_sets_for_document
 from onyx.db.engine import get_all_tenant_ids
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.search_settings import get_active_search_settings
 from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import VespaDocumentFields
@@ -79,7 +79,7 @@ def document_by_cc_pair_cleanup_task(
    start = time.monotonic()

    try:
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            action = "skip"
            chunks_affected = 0

@@ -105,7 +105,6 @@ def document_by_cc_pair_cleanup_task(
                    tenant_id=tenant_id,
                    chunk_count=chunk_count,
                )
-
                delete_documents_complete__no_commit(
                    db_session=db_session,
                    document_ids=[document_id],
@@ -205,7 +204,7 @@ def document_by_cc_pair_cleanup_task(
                f"Max celery task retries reached. Marking doc as dirty for reconciliation: "
                f"doc={document_id}"
            )
-            with get_session_with_current_tenant() as db_session:
+            with get_session_with_tenant(tenant_id) as db_session:
                # delete the cc pair relationship now and let reconciliation clean it up
                # in vespa
                delete_document_by_connector_credential_pair__no_commit(
--- a/backend/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/onyx/background/celery/tasks/vespa/tasks.py
@@ -34,7 +34,7 @@ from onyx.db.document_set import fetch_document_sets
 from onyx.db.document_set import fetch_document_sets_for_document
 from onyx.db.document_set import get_document_set_by_id
 from onyx.db.document_set import mark_document_set_as_synced
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
 from onyx.db.models import DocumentSet
@@ -78,14 +78,10 @@ logger = setup_logger()
 def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | None:
    """Runs periodically to check if any document needs syncing.
    Generates sets of tasks for Celery if syncing is needed."""
-
-    # Useful for debugging timing issues with reacquisitions. TODO: remove once more generalized logging is in place
-    task_logger.info("check_for_vespa_sync_task started")
-
    time_start = time.monotonic()

-    r = get_redis_client()
-    r_replica = get_redis_replica_client()
+    r = get_redis_client(tenant_id=tenant_id)
+    r_replica = get_redis_replica_client(tenant_id=tenant_id)

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK,
@@ -98,7 +94,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No

    try:
        # 1/3: KICKOFF
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            try_generate_stale_document_sync_tasks(
                self.app, VESPA_SYNC_MAX_TASKS, db_session, r, lock_beat, tenant_id
            )
@@ -106,7 +102,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No
        # region document set scan
        lock_beat.reacquire()
        document_set_ids: list[int] = []
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            # check if any document sets are not synced
            document_set_info = fetch_document_sets(
                user_id=None, db_session=db_session, include_outdated=True
@@ -117,7 +113,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No

        for document_set_id in document_set_ids:
            lock_beat.reacquire()
-            with get_session_with_current_tenant() as db_session:
+            with get_session_with_tenant(tenant_id) as db_session:
                try_generate_document_set_sync_tasks(
                    self.app, document_set_id, db_session, r, lock_beat, tenant_id
                )
@@ -136,7 +132,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No
                pass
            else:
                usergroup_ids: list[int] = []
-                with get_session_with_current_tenant() as db_session:
+                with get_session_with_tenant(tenant_id) as db_session:
                    user_groups = fetch_user_groups(
                        db_session=db_session, only_up_to_date=False
                    )
@@ -146,7 +142,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No

                for usergroup_id in usergroup_ids:
                    lock_beat.reacquire()
-                    with get_session_with_current_tenant() as db_session:
+                    with get_session_with_tenant(tenant_id) as db_session:
                        try_generate_user_group_sync_tasks(
                            self.app, usergroup_id, db_session, r, lock_beat, tenant_id
                        )
@@ -167,7 +163,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No
            if key_str == RedisGlobalConnectorCredentialPair.FENCE_KEY:
                monitor_connector_taskset(r)
            elif key_str.startswith(RedisDocumentSet.FENCE_PREFIX):
-                with get_session_with_current_tenant() as db_session:
+                with get_session_with_tenant(tenant_id) as db_session:
                    monitor_document_set_taskset(tenant_id, key_bytes, r, db_session)
            elif key_str.startswith(RedisUserGroup.FENCE_PREFIX):
                monitor_usergroup_taskset = (
@@ -177,7 +173,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No
                        noop_fallback,
                    )
                )
-                with get_session_with_current_tenant() as db_session:
+                with get_session_with_tenant(tenant_id) as db_session:
                    monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session)

    except SoftTimeLimitExceeded:
@@ -496,21 +492,13 @@ def monitor_document_set_taskset(
            task_logger.info(
                f"Successfully synced document set: document_set={document_set_id}"
            )
-
-        try:
-            update_sync_record_status(
-                db_session=db_session,
-                entity_id=document_set_id,
-                sync_type=SyncType.DOCUMENT_SET,
-                sync_status=SyncStatus.SUCCESS,
-                num_docs_synced=initial_count,
-            )
-        except Exception:
-            task_logger.exception(
-                "update_sync_record_status exceptioned. "
-                f"document_set_id={document_set_id} "
-                "Resetting document set regardless."
-            )
+        update_sync_record_status(
+            db_session=db_session,
+            entity_id=document_set_id,
+            sync_type=SyncType.DOCUMENT_SET,
+            sync_status=SyncStatus.SUCCESS,
+            num_docs_synced=initial_count,
+        )

    rds.reset()

@@ -523,12 +511,12 @@ def monitor_document_set_taskset(
    max_retries=3,
 )
 def vespa_metadata_sync_task(
-    self: Task, document_id: str, *, tenant_id: str | None
+    self: Task, document_id: str, tenant_id: str | None
 ) -> bool:
    start = time.monotonic()

    try:
-        with get_session_with_current_tenant() as db_session:
+        with get_session_with_tenant(tenant_id) as db_session:
            active_search_settings = get_active_search_settings(db_session)
            doc_index = get_default_document_index(
                search_settings=active_search_settings.primary,
--- a/backend/onyx/background/error_logging.py
+++ b/backend/onyx/background/error_logging.py
@@ -1,5 +1,5 @@
 from onyx.db.background_error import create_background_error
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant


 def emit_background_error(
@@ -9,5 +9,5 @@ def emit_background_error(
    """Currently just saves a row in the background_errors table.

    In the future, could create notifications based on the severity."""
-    with get_session_with_current_tenant() as db_session:
+    with get_session_with_tenant() as db_session:
        create_background_error(db_session, message, cc_pair_id)
--- a/backend/onyx/background/indexing/checkpointing.py
+++ b/backend/onyx/background/indexing/checkpointing.py
@@ -0,0 +1,80 @@
+"""Experimental functionality related to splitting up indexing
+into a series of checkpoints to better handle intermittent failures
+/ jobs being killed by cloud providers."""
+import datetime
+
+from onyx.configs.app_configs import EXPERIMENTAL_CHECKPOINTING_ENABLED
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.cross_connector_utils.miscellaneous_utils import datetime_to_utc
+
+
+def _2010_dt() -> datetime.datetime:
+    return datetime.datetime(year=2010, month=1, day=1, tzinfo=datetime.timezone.utc)
+
+
+def _2020_dt() -> datetime.datetime:
+    return datetime.datetime(year=2020, month=1, day=1, tzinfo=datetime.timezone.utc)
+
+
+def _default_end_time(
+    last_successful_run: datetime.datetime | None,
+) -> datetime.datetime:
+    """If year is before 2010, go to the beginning of 2010.
+    If year is 2010-2020, go in 5 year increments.
+    If year > 2020, then go in 180 day increments.
+
+    For connectors that don't support a `filter_by` and instead rely on `sort_by`
+    for polling, then this will cause a massive duplication of fetches. For these
+    connectors, you may want to override this function to return a more reasonable
+    plan (e.g. extending the 2020+ windows to 6 months, 1 year, or higher)."""
+    last_successful_run = (
+        datetime_to_utc(last_successful_run) if last_successful_run else None
+    )
+    if last_successful_run is None or last_successful_run < _2010_dt():
+        return _2010_dt()
+
+    if last_successful_run < _2020_dt():
+        return min(last_successful_run + datetime.timedelta(days=365 * 5), _2020_dt())
+
+    return last_successful_run + datetime.timedelta(days=180)
+
+
+def find_end_time_for_indexing_attempt(
+    last_successful_run: datetime.datetime | None,
+    # source_type can be used to override the default for certain connectors, currently unused
+    source_type: DocumentSource,
+) -> datetime.datetime | None:
+    """Is the current time unless the connector is run over a large period, in which case it is
+    split up into large time segments that become smaller as it approaches the present
+    """
+    # NOTE: source_type can be used to override the default for certain connectors
+    end_of_window = _default_end_time(last_successful_run)
+    now = datetime.datetime.now(tz=datetime.timezone.utc)
+    if end_of_window < now:
+        return end_of_window
+
+    # None signals that we should index up to current time
+    return None
+
+
+def get_time_windows_for_index_attempt(
+    last_successful_run: datetime.datetime, source_type: DocumentSource
+) -> list[tuple[datetime.datetime, datetime.datetime]]:
+    if not EXPERIMENTAL_CHECKPOINTING_ENABLED:
+        return [(last_successful_run, datetime.datetime.now(tz=datetime.timezone.utc))]
+
+    time_windows: list[tuple[datetime.datetime, datetime.datetime]] = []
+    start_of_window: datetime.datetime | None = last_successful_run
+    while start_of_window:
+        end_of_window = find_end_time_for_indexing_attempt(
+            last_successful_run=start_of_window, source_type=source_type
+        )
+        time_windows.append(
+            (
+                start_of_window,
+                end_of_window or datetime.datetime.now(tz=datetime.timezone.utc),
+            )
+        )
+        start_of_window = end_of_window
+
+    return time_windows
--- a/backend/onyx/background/indexing/checkpointing_utils.py
+++ b/backend/onyx/background/indexing/checkpointing_utils.py
@@ -1,200 +0,0 @@
-from datetime import datetime
-from datetime import timedelta
-from io import BytesIO
-
-from sqlalchemy import and_
-from sqlalchemy.orm import Session
-
-from onyx.configs.constants import FileOrigin
-from onyx.connectors.models import ConnectorCheckpoint
-from onyx.db.engine import get_db_current_time
-from onyx.db.index_attempt import get_index_attempt
-from onyx.db.index_attempt import get_recent_completed_attempts_for_cc_pair
-from onyx.db.models import IndexAttempt
-from onyx.db.models import IndexingStatus
-from onyx.file_store.file_store import get_default_file_store
-from onyx.utils.logger import setup_logger
-from onyx.utils.object_size_check import deep_getsizeof
-
-
-logger = setup_logger()
-
-_NUM_RECENT_ATTEMPTS_TO_CONSIDER = 20
-_NUM_DOCS_INDEXED_TO_BE_VALID_CHECKPOINT = 100
-
-
-def _build_checkpoint_pointer(index_attempt_id: int) -> str:
-    return f"checkpoint_{index_attempt_id}.json"
-
-
-def save_checkpoint(
-    db_session: Session, index_attempt_id: int, checkpoint: ConnectorCheckpoint
-) -> str:
-    """Save a checkpoint for a given index attempt to the file store"""
-    checkpoint_pointer = _build_checkpoint_pointer(index_attempt_id)
-
-    file_store = get_default_file_store(db_session)
-    file_store.save_file(
-        file_name=checkpoint_pointer,
-        content=BytesIO(checkpoint.model_dump_json().encode()),
-        display_name=checkpoint_pointer,
-        file_origin=FileOrigin.INDEXING_CHECKPOINT,
-        file_type="application/json",
-    )
-
-    index_attempt = get_index_attempt(db_session, index_attempt_id)
-    if not index_attempt:
-        raise RuntimeError(f"Index attempt {index_attempt_id} not found in DB.")
-    index_attempt.checkpoint_pointer = checkpoint_pointer
-    db_session.add(index_attempt)
-    db_session.commit()
-    return checkpoint_pointer
-
-
-def load_checkpoint(
-    db_session: Session, index_attempt_id: int
-) -> ConnectorCheckpoint | None:
-    """Load a checkpoint for a given index attempt from the file store"""
-    checkpoint_pointer = _build_checkpoint_pointer(index_attempt_id)
-    file_store = get_default_file_store(db_session)
-    try:
-        checkpoint_io = file_store.read_file(checkpoint_pointer, mode="rb")
-        checkpoint_data = checkpoint_io.read().decode("utf-8")
-        return ConnectorCheckpoint.model_validate_json(checkpoint_data)
-    except RuntimeError:
-        return None
-
-
-def get_latest_valid_checkpoint(
-    db_session: Session,
-    cc_pair_id: int,
-    search_settings_id: int,
-    window_start: datetime,
-    window_end: datetime,
-) -> ConnectorCheckpoint:
-    """Get the latest valid checkpoint for a given connector credential pair"""
-    checkpoint_candidates = get_recent_completed_attempts_for_cc_pair(
-        cc_pair_id=cc_pair_id,
-        search_settings_id=search_settings_id,
-        db_session=db_session,
-        limit=_NUM_RECENT_ATTEMPTS_TO_CONSIDER,
-    )
-    checkpoint_candidates = [
-        candidate
-        for candidate in checkpoint_candidates
-        if (
-            candidate.poll_range_start == window_start
-            and candidate.poll_range_end == window_end
-            and candidate.status == IndexingStatus.FAILED
-            and candidate.checkpoint_pointer is not None
-            # we want to make sure that the checkpoint is actually useful
-            # if it's only gone through a few docs, it's probably not worth
-            # using. This also avoids weird cases where a connector is basically
-            # non-functional but still "makes progress" by slowly moving the
-            # checkpoint forward run after run
-            and candidate.total_docs_indexed
-            and candidate.total_docs_indexed > _NUM_DOCS_INDEXED_TO_BE_VALID_CHECKPOINT
-        )
-    ]
-
-    # don't keep using checkpoints if we've had a bunch of failed attempts in a row
-    # for now, capped at 10
-    if len(checkpoint_candidates) == _NUM_RECENT_ATTEMPTS_TO_CONSIDER:
-        logger.warning(
-            f"{_NUM_RECENT_ATTEMPTS_TO_CONSIDER} consecutive failed attempts found "
-            f"for cc_pair={cc_pair_id}. Ignoring checkpoint to let the run start "
-            "from scratch."
-        )
-        return ConnectorCheckpoint.build_dummy_checkpoint()
-
-    # assumes latest checkpoint is the furthest along. This only isn't true
-    # if something else has gone wrong.
-    latest_valid_checkpoint_candidate = (
-        checkpoint_candidates[0] if checkpoint_candidates else None
-    )
-
-    checkpoint = ConnectorCheckpoint.build_dummy_checkpoint()
-    if latest_valid_checkpoint_candidate:
-        try:
-            previous_checkpoint = load_checkpoint(
-                db_session=db_session,
-                index_attempt_id=latest_valid_checkpoint_candidate.id,
-            )
-        except Exception:
-            logger.exception(
-                f"Failed to load checkpoint from previous failed attempt with ID "
-                f"{latest_valid_checkpoint_candidate.id}."
-            )
-            previous_checkpoint = None
-
-        if previous_checkpoint is not None:
-            logger.info(
-                f"Using checkpoint from previous failed attempt with ID "
-                f"{latest_valid_checkpoint_candidate.id}. Previous checkpoint: "
-                f"{previous_checkpoint}"
-            )
-            save_checkpoint(
-                db_session=db_session,
-                index_attempt_id=latest_valid_checkpoint_candidate.id,
-                checkpoint=previous_checkpoint,
-            )
-            checkpoint = previous_checkpoint
-
-    return checkpoint
-
-
-def get_index_attempts_with_old_checkpoints(
-    db_session: Session, days_to_keep: int = 7
-) -> list[IndexAttempt]:
-    """Get all index attempts with checkpoints older than the specified number of days.
-
-    Args:
-        db_session: The database session
-        days_to_keep: Number of days to keep checkpoints for (default: 7)
-
-    Returns:
-        Number of checkpoints deleted
-    """
-    cutoff_date = get_db_current_time(db_session) - timedelta(days=days_to_keep)
-
-    # Find all index attempts with checkpoints older than cutoff_date
-    old_attempts = (
-        db_session.query(IndexAttempt)
-        .filter(
-            and_(
-                IndexAttempt.checkpoint_pointer.isnot(None),
-                IndexAttempt.time_created < cutoff_date,
-            )
-        )
-        .all()
-    )
-
-    return old_attempts
-
-
-def cleanup_checkpoint(db_session: Session, index_attempt_id: int) -> None:
-    """Clean up a checkpoint for a given index attempt"""
-    index_attempt = get_index_attempt(db_session, index_attempt_id)
-    if not index_attempt:
-        raise RuntimeError(f"Index attempt {index_attempt_id} not found in DB.")
-
-    if not index_attempt.checkpoint_pointer:
-        return None
-
-    file_store = get_default_file_store(db_session)
-    file_store.delete_file(index_attempt.checkpoint_pointer)
-
-    index_attempt.checkpoint_pointer = None
-    db_session.add(index_attempt)
-    db_session.commit()
-
-    return None
-
-
-def check_checkpoint_size(checkpoint: ConnectorCheckpoint) -> None:
-    """Check if the checkpoint content size exceeds the limit (200MB)"""
-    content_size = deep_getsizeof(checkpoint.checkpoint_content)
-    if content_size > 200_000_000:  # 200MB in bytes
-        raise ValueError(
-            f"Checkpoint content size ({content_size} bytes) exceeds 200MB limit"
-        )
--- a/backend/onyx/background/indexing/job_client.py
+++ b/backend/onyx/background/indexing/job_client.py
@@ -5,8 +5,6 @@ not follow the expected behavior, etc.
 NOTE: cannot use Celery directly due to
 https://github.com/celery/celery/issues/7007#issuecomment-1740139367"""
 import multiprocessing as mp
-import sys
-import traceback
 from collections.abc import Callable
 from dataclasses import dataclass
 from multiprocessing.context import SpawnProcess
@@ -20,16 +18,6 @@ from onyx.utils.logger import setup_logger

 logger = setup_logger()

-
-class SimpleJobException(Exception):
-    """lets us raise an exception that will return a specific error code"""
-
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        code: int | None = kwargs.pop("code", None)
-        self.code = code
-        super().__init__(*args, **kwargs)
-
-
 JobStatusType = (
    Literal["error"]
    | Literal["finished"]
@@ -40,10 +28,7 @@ JobStatusType = (


 def _initializer(
-    func: Callable,
-    queue: mp.Queue,
-    args: list | tuple,
-    kwargs: dict[str, Any] | None = None,
+    func: Callable, args: list | tuple, kwargs: dict[str, Any] | None = None
 ) -> Any:
    """Initialize the child process with a fresh SQLAlchemy Engine.

@@ -67,29 +52,13 @@ def _initializer(
    )

    # Proceed with executing the target function
-    try:
-        return func(*args, **kwargs)
-    except SimpleJobException as e:
-        logger.exception("SimpleJob raised a SimpleJobException")
-        error_msg = traceback.format_exc()
-        queue.put(error_msg)  # Send the exception to the parent process
-
-        sys.exit(e.code)  # use the given exit code
-    except Exception:
-        logger.exception("SimpleJob raised an exception")
-        error_msg = traceback.format_exc()
-        queue.put(error_msg)  # Send the exception to the parent process
-
-        sys.exit(255)  # use 255 to indicate a generic exception
+    return func(*args, **kwargs)


 def _run_in_process(
-    func: Callable,
-    queue: mp.Queue,
-    args: list | tuple,
-    kwargs: dict[str, Any] | None = None,
+    func: Callable, args: list | tuple, kwargs: dict[str, Any] | None = None
 ) -> None:
-    _initializer(func, queue, args, kwargs)
+    _initializer(func, args, kwargs)


@dataclass
@@ -98,8 +67,6 @@ class SimpleJob:

    id: int
    process: Optional["SpawnProcess"] = None
-    queue: Optional[mp.Queue] = None
-    _exception: Optional[str] = None

    def cancel(self) -> bool:
        return self.release()
@@ -133,15 +100,9 @@ class SimpleJob:
    def exception(self) -> str:
        """Needed to match the Dask API, but not implemented since we don't currently
        have a way to get back the exception information from the child process."""
-
-        """Retrieve exception from the multiprocessing queue if available."""
-        if self._exception is None and self.queue and not self.queue.empty():
-            self._exception = self.queue.get()  # Get exception from queue
-
-        if self._exception:
-            return self._exception
-
-        return f"Job with ID '{self.id}' did not report an exception."
+        return (
+            f"Job with ID '{self.id}' was killed or encountered an unhandled exception."
+        )


 class SimpleJobClient:
@@ -176,11 +137,8 @@ class SimpleJobClient:
        # this approach allows us to always "spawn" a new process regardless of
        # get_start_method's current setting
        ctx = mp.get_context("spawn")
-        queue = ctx.Queue()
-        process = ctx.Process(
-            target=_run_in_process, args=(func, queue, args), daemon=True
-        )
-        job = SimpleJob(id=job_id, process=process, queue=queue)
+        process = ctx.Process(target=_run_in_process, args=(func, args), daemon=True)
+        job = SimpleJob(id=job_id, process=process)
        process.start()

        self.jobs[job_id] = job
--- a/backend/onyx/background/indexing/memory_tracer.py
+++ b/backend/onyx/background/indexing/memory_tracer.py
@@ -1,87 +0,0 @@
-import tracemalloc
-
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-DANSWER_TRACEMALLOC_FRAMES = 10
-
-
-class MemoryTracer:
-    def __init__(self, interval: int = 0, num_print_entries: int = 5):
-        self.interval = interval
-        self.num_print_entries = num_print_entries
-        self.snapshot_first: tracemalloc.Snapshot | None = None
-        self.snapshot_prev: tracemalloc.Snapshot | None = None
-        self.snapshot: tracemalloc.Snapshot | None = None
-        self.counter = 0
-
-    def start(self) -> None:
-        """Start the memory tracer if interval is greater than 0."""
-        if self.interval > 0:
-            logger.debug(f"Memory tracer starting: interval={self.interval}")
-            tracemalloc.start(DANSWER_TRACEMALLOC_FRAMES)
-            self._take_snapshot()
-
-    def stop(self) -> None:
-        """Stop the memory tracer if it's running."""
-        if self.interval > 0:
-            self.log_final_diff()
-            tracemalloc.stop()
-            logger.debug("Memory tracer stopped.")
-
-    def _take_snapshot(self) -> None:
-        """Take a snapshot and update internal snapshot states."""
-        snapshot = tracemalloc.take_snapshot()
-        # Filter out irrelevant frames
-        snapshot = snapshot.filter_traces(
-            (
-                tracemalloc.Filter(False, tracemalloc.__file__),
-                tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
-                tracemalloc.Filter(False, "<frozen importlib._bootstrap_external>"),
-            )
-        )
-
-        if not self.snapshot_first:
-            self.snapshot_first = snapshot
-
-        if self.snapshot:
-            self.snapshot_prev = self.snapshot
-
-        self.snapshot = snapshot
-
-    def _log_diff(
-        self, current: tracemalloc.Snapshot, previous: tracemalloc.Snapshot
-    ) -> None:
-        """Log the memory difference between two snapshots."""
-        stats = current.compare_to(previous, "traceback")
-        for s in stats[: self.num_print_entries]:
-            logger.debug(f"Tracer diff: {s}")
-            for line in s.traceback.format():
-                logger.debug(f"* {line}")
-
-    def increment_and_maybe_trace(self) -> None:
-        """Increment counter and perform trace if interval is hit."""
-        if self.interval <= 0:
-            return
-
-        self.counter += 1
-        if self.counter % self.interval == 0:
-            logger.debug(
-                f"Running trace comparison for batch {self.counter}. interval={self.interval}"
-            )
-            self._take_snapshot()
-            if self.snapshot and self.snapshot_prev:
-                self._log_diff(self.snapshot, self.snapshot_prev)
-
-    def log_final_diff(self) -> None:
-        """Log the final memory diff between start and end of indexing."""
-        if self.interval <= 0:
-            return
-
-        logger.debug(
-            f"Running trace comparison between start and end of indexing. {self.counter} batches processed."
-        )
-        self._take_snapshot()
-        if self.snapshot and self.snapshot_first:
-            self._log_diff(self.snapshot, self.snapshot_first)
--- a/backend/onyx/background/indexing/models.py
+++ b/backend/onyx/background/indexing/models.py
@@ -1,40 +0,0 @@
-from datetime import datetime
-
-from pydantic import BaseModel
-
-from onyx.db.models import IndexAttemptError
-
-
-class IndexAttemptErrorPydantic(BaseModel):
-    id: int
-    connector_credential_pair_id: int
-
-    document_id: str | None
-    document_link: str | None
-
-    entity_id: str | None
-    failed_time_range_start: datetime | None
-    failed_time_range_end: datetime | None
-
-    failure_message: str
-    is_resolved: bool = False
-
-    time_created: datetime
-
-    index_attempt_id: int
-
-    @classmethod
-    def from_model(cls, model: IndexAttemptError) -> "IndexAttemptErrorPydantic":
-        return cls(
-            id=model.id,
-            connector_credential_pair_id=model.connector_credential_pair_id,
-            document_id=model.document_id,
-            document_link=model.document_link,
-            entity_id=model.entity_id,
-            failed_time_range_start=model.failed_time_range_start,
-            failed_time_range_end=model.failed_time_range_end,
-            failure_message=model.failure_message,
-            is_resolved=model.is_resolved,
-            time_created=model.time_created,
-            index_attempt_id=model.index_attempt_id,
-        )
--- a/backend/onyx/background/indexing/run_indexing.py
+++ b/backend/onyx/background/indexing/run_indexing.py
@@ -1,6 +1,5 @@
 import time
 import traceback
-from collections import defaultdict
 from datetime import datetime
 from datetime import timedelta
 from datetime import timezone
@@ -8,42 +7,32 @@ from datetime import timezone
 from pydantic import BaseModel
 from sqlalchemy.orm import Session

-from onyx.background.indexing.checkpointing_utils import check_checkpoint_size
-from onyx.background.indexing.checkpointing_utils import get_latest_valid_checkpoint
-from onyx.background.indexing.checkpointing_utils import save_checkpoint
-from onyx.background.indexing.memory_tracer import MemoryTracer
-from onyx.configs.app_configs import INDEX_BATCH_SIZE
+from onyx.background.indexing.checkpointing import get_time_windows_for_index_attempt
+from onyx.background.indexing.tracer import OnyxTracer
 from onyx.configs.app_configs import INDEXING_SIZE_WARNING_THRESHOLD
 from onyx.configs.app_configs import INDEXING_TRACER_INTERVAL
-from onyx.configs.app_configs import INTEGRATION_TESTS_MODE
 from onyx.configs.app_configs import LEAVE_CONNECTOR_ACTIVE_ON_INITIALIZATION_FAILURE
 from onyx.configs.app_configs import POLL_CONNECTOR_OFFSET
 from onyx.configs.constants import DocumentSource
 from onyx.configs.constants import MilestoneRecordType
 from onyx.connectors.connector_runner import ConnectorRunner
 from onyx.connectors.factory import instantiate_connector
-from onyx.connectors.interfaces import ConnectorValidationError
-from onyx.connectors.models import ConnectorCheckpoint
-from onyx.connectors.models import ConnectorFailure
 from onyx.connectors.models import Document
 from onyx.connectors.models import IndexAttemptMetadata
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_last_successful_attempt_time
 from onyx.db.connector_credential_pair import update_connector_credential_pair
-from onyx.db.engine import get_session_with_current_tenant
+from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
-from onyx.db.index_attempt import create_index_attempt_error
 from onyx.db.index_attempt import get_index_attempt
-from onyx.db.index_attempt import get_index_attempt_errors_for_cc_pair
-from onyx.db.index_attempt import get_recent_completed_attempts_for_cc_pair
 from onyx.db.index_attempt import mark_attempt_canceled
 from onyx.db.index_attempt import mark_attempt_failed
 from onyx.db.index_attempt import mark_attempt_partially_succeeded
 from onyx.db.index_attempt import mark_attempt_succeeded
 from onyx.db.index_attempt import transition_attempt_to_in_progress
 from onyx.db.index_attempt import update_docs_indexed
+from onyx.db.models import ConnectorCredentialPair
 from onyx.db.models import IndexAttempt
-from onyx.db.models import IndexAttemptError
 from onyx.db.models import IndexingStatus
 from onyx.db.models import IndexModelStatus
 from onyx.document_index.factory import get_default_document_index
@@ -64,7 +53,6 @@ INDEXING_TRACER_NUM_PRINT_ENTRIES = 5
 def _get_connector_runner(
    db_session: Session,
    attempt: IndexAttempt,
-    batch_size: int,
    start_time: datetime,
    end_time: datetime,
    tenant_id: str | None,
@@ -88,11 +76,6 @@ def _get_connector_runner(
            credential=attempt.connector_credential_pair.credential,
            tenant_id=tenant_id,
        )
-
-        # validate the connector settings
-        if not INTEGRATION_TESTS_MODE:
-            runnable_connector.validate_connector_settings()
-
    except Exception as e:
        logger.exception(f"Unable to instantiate connector due to {e}")

@@ -117,9 +100,7 @@ def _get_connector_runner(
        raise e

    return ConnectorRunner(
-        connector=runnable_connector,
-        batch_size=batch_size,
-        time_range=(start_time, end_time),
+        connector=runnable_connector, time_range=(start_time, end_time)
    )


@@ -178,66 +159,6 @@ class RunIndexingContext(BaseModel):
    search_settings_status: IndexModelStatus


-def _check_connector_and_attempt_status(
-    db_session_temp: Session, ctx: RunIndexingContext, index_attempt_id: int
-) -> None:
-    """
-    Checks the status of the connector credential pair and index attempt.
-    Raises a RuntimeError if any conditions are not met.
-    """
-    cc_pair_loop = get_connector_credential_pair_from_id(
-        db_session_temp,
-        ctx.cc_pair_id,
-    )
-    if not cc_pair_loop:
-        raise RuntimeError(f"CC pair {ctx.cc_pair_id} not found in DB.")
-
-    if (
-        cc_pair_loop.status == ConnectorCredentialPairStatus.PAUSED
-        and ctx.search_settings_status != IndexModelStatus.FUTURE
-    ) or cc_pair_loop.status == ConnectorCredentialPairStatus.DELETING:
-        raise RuntimeError("Connector was disabled mid run")
-
-    index_attempt_loop = get_index_attempt(db_session_temp, index_attempt_id)
-    if not index_attempt_loop:
-        raise RuntimeError(f"Index attempt {index_attempt_id} not found in DB.")
-
-    if index_attempt_loop.status != IndexingStatus.IN_PROGRESS:
-        raise RuntimeError(
-            f"Index Attempt was canceled, status is {index_attempt_loop.status}"
-        )
-
-
-def _check_failure_threshold(
-    total_failures: int,
-    document_count: int,
-    batch_num: int,
-    last_failure: ConnectorFailure | None,
-) -> None:
-    """Check if we've hit the failure threshold and raise an appropriate exception if so.
-
-    We consider the threshold hit if:
-    1. We have more than 3 failures AND
-    2. Failures account for more than 10% of processed documents
-    """
-    failure_ratio = total_failures / (document_count or 1)
-
-    FAILURE_THRESHOLD = 3
-    FAILURE_RATIO_THRESHOLD = 0.1
-    if total_failures > FAILURE_THRESHOLD and failure_ratio > FAILURE_RATIO_THRESHOLD:
-        logger.error(
-            f"Connector run failed with '{total_failures}' errors "
-            f"after '{batch_num}' batches."
-        )
-        if last_failure and last_failure.exception:
-            raise last_failure.exception from last_failure.exception
-
-        raise RuntimeError(
-            f"Connector run encountered too many errors, aborting. "
-            f"Last error: {last_failure}"
-        )
-
-
 def _run_indexing(
    db_session: Session,
    index_attempt_id: int,
@@ -248,10 +169,13 @@ def _run_indexing(
    1. Get documents which are either new or updated from specified application
    2. Embed and index these documents into the chosen datastore (vespa)
    3. Updates Postgres to record the indexed documents + the outcome of this run
-    """
-    start_time = time.monotonic()  # jsut used for logging

-    with get_session_with_current_tenant() as db_session_temp:
+    TODO: do not change index attempt statuses here ... instead, set signals in redis
+    and allow the monitor function to clean them up
+    """
+    start_time = time.time()
+
+    with get_session_with_tenant(tenant_id) as db_session_temp:
        index_attempt_start = get_index_attempt(db_session_temp, index_attempt_id)
        if not index_attempt_start:
            raise ValueError(
@@ -297,46 +221,6 @@ def _run_indexing(
                db_session=db_session_temp,
            )
        )
-        if last_successful_index_time > POLL_CONNECTOR_OFFSET:
-            window_start = datetime.fromtimestamp(
-                last_successful_index_time, tz=timezone.utc
-            ) - timedelta(minutes=POLL_CONNECTOR_OFFSET)
-        else:
-            # don't go into "negative" time if we've never indexed before
-            window_start = datetime.fromtimestamp(0, tz=timezone.utc)
-
-        most_recent_attempt = next(
-            iter(
-                get_recent_completed_attempts_for_cc_pair(
-                    cc_pair_id=ctx.cc_pair_id,
-                    search_settings_id=index_attempt_start.search_settings_id,
-                    db_session=db_session_temp,
-                    limit=1,
-                )
-            ),
-            None,
-        )
-        # if the last attempt failed, try and use the same window. This is necessary
-        # to ensure correctness with checkpointing. If we don't do this, things like
-        # new slack channels could be missed (since existing slack channels are
-        # cached as part of the checkpoint).
-        if (
-            most_recent_attempt
-            and most_recent_attempt.poll_range_end
-            and (
-                most_recent_attempt.status == IndexingStatus.FAILED
-                or most_recent_attempt.status == IndexingStatus.CANCELED
-            )
-        ):
-            window_end = most_recent_attempt.poll_range_end
-        else:
-            window_end = datetime.now(tz=timezone.utc)
-
-        # add start/end now that they have been set
-        index_attempt_start.poll_range_start = window_start
-        index_attempt_start.poll_range_end = window_end
-        db_session_temp.add(index_attempt_start)
-        db_session_temp.commit()

        embedding_model = DefaultIndexingEmbedder.from_db_search_settings(
            search_settings=index_attempt_start.search_settings,
@@ -350,6 +234,7 @@ def _run_indexing(
    )

    indexing_pipeline = build_indexing_pipeline(
+        attempt_id=index_attempt_id,
        embedder=embedding_model,
        document_index=document_index,
        ignore_time_skip=(
@@ -361,73 +246,63 @@ def _run_indexing(
        callback=callback,
    )

-    # Initialize memory tracer. NOTE: won't actually do anything if
-    # `INDEXING_TRACER_INTERVAL` is 0.
-    memory_tracer = MemoryTracer(interval=INDEXING_TRACER_INTERVAL)
-    memory_tracer.start()
+    tracer: OnyxTracer
+    if INDEXING_TRACER_INTERVAL > 0:
+        logger.debug(f"Memory tracer starting: interval={INDEXING_TRACER_INTERVAL}")
+        tracer = OnyxTracer()
+        tracer.start()
+        tracer.snap()

    index_attempt_md = IndexAttemptMetadata(
        connector_id=ctx.connector_id,
        credential_id=ctx.credential_id,
    )

-    total_failures = 0
    batch_num = 0
    net_doc_change = 0
    document_count = 0
    chunk_count = 0
-    try:
-        with get_session_with_current_tenant() as db_session_temp:
-            index_attempt = get_index_attempt(db_session_temp, index_attempt_id)
-            if not index_attempt:
-                raise RuntimeError(f"Index attempt {index_attempt_id} not found in DB.")
+    run_end_dt = None
+    tracer_counter: int

-            connector_runner = _get_connector_runner(
-                db_session=db_session_temp,
-                attempt=index_attempt,
-                batch_size=INDEX_BATCH_SIZE,
-                start_time=window_start,
-                end_time=window_end,
-                tenant_id=tenant_id,
+    for ind, (window_start, window_end) in enumerate(
+        get_time_windows_for_index_attempt(
+            last_successful_run=datetime.fromtimestamp(
+                last_successful_index_time, tz=timezone.utc
+            ),
+            source_type=db_connector.source,
+        )
+    ):
+        cc_pair_loop: ConnectorCredentialPair | None = None
+        index_attempt_loop: IndexAttempt | None = None
+        tracer_counter = 0
+
+        try:
+            window_start = max(
+                window_start - timedelta(minutes=POLL_CONNECTOR_OFFSET),
+                datetime(1970, 1, 1, tzinfo=timezone.utc),
            )

-            # don't use a checkpoint if we're explicitly indexing from
-            # the beginning in order to avoid weird interactions between
-            # checkpointing / failure handling.
-            if index_attempt.from_beginning:
-                checkpoint = ConnectorCheckpoint.build_dummy_checkpoint()
-            else:
-                checkpoint = get_latest_valid_checkpoint(
+            with get_session_with_tenant(tenant_id) as db_session_temp:
+                index_attempt_loop_start = get_index_attempt(
+                    db_session_temp, index_attempt_id
+                )
+                if not index_attempt_loop_start:
+                    raise RuntimeError(
+                        f"Index attempt {index_attempt_id} not found in DB."
+                    )
+
+                connector_runner = _get_connector_runner(
                    db_session=db_session_temp,
-                    cc_pair_id=ctx.cc_pair_id,
-                    search_settings_id=index_attempt.search_settings_id,
-                    window_start=window_start,
-                    window_end=window_end,
+                    attempt=index_attempt_loop_start,
+                    start_time=window_start,
+                    end_time=window_end,
+                    tenant_id=tenant_id,
                )

-            unresolved_errors = get_index_attempt_errors_for_cc_pair(
-                cc_pair_id=ctx.cc_pair_id,
-                unresolved_only=True,
-                db_session=db_session_temp,
-            )
-            doc_id_to_unresolved_errors: dict[
-                str, list[IndexAttemptError]
-            ] = defaultdict(list)
-            for error in unresolved_errors:
-                if error.document_id:
-                    doc_id_to_unresolved_errors[error.document_id].append(error)
-
-            entity_based_unresolved_errors = [
-                error for error in unresolved_errors if error.entity_id
-            ]
-
-        while checkpoint.has_more:
-            logger.info(
-                f"Running '{ctx.source}' connector with checkpoint: {checkpoint}"
-            )
-            for document_batch, failure, next_checkpoint in connector_runner.run(
-                checkpoint
-            ):
+            if INDEXING_TRACER_INTERVAL > 0:
+                tracer.snap()
+            for doc_batch in connector_runner.run():
                # Check if connector is disabled mid run and stop if so unless it's the secondary
                # index being built. We want to populate it even for paused connectors
                # Often paused connectors are sources that aren't updated frequently but the
@@ -437,38 +312,42 @@ def _run_indexing(
                        raise ConnectorStopSignal("Connector stop signal detected")

                # TODO: should we move this into the above callback instead?
-                with get_session_with_current_tenant() as db_session_temp:
-                    # will exception if the connector/index attempt is marked as paused/failed
-                    _check_connector_and_attempt_status(
-                        db_session_temp, ctx, index_attempt_id
+                with get_session_with_tenant(tenant_id) as db_session_temp:
+                    cc_pair_loop = get_connector_credential_pair_from_id(
+                        db_session_temp,
+                        ctx.cc_pair_id,
                    )
+                    if not cc_pair_loop:
+                        raise RuntimeError(f"CC pair {ctx.cc_pair_id} not found in DB.")

-                # save record of any failures at the connector level
-                if failure is not None:
-                    total_failures += 1
-                    with get_session_with_current_tenant() as db_session_temp:
-                        create_index_attempt_error(
-                            index_attempt_id,
-                            ctx.cc_pair_id,
-                            failure,
-                            db_session_temp,
+                    if (
+                        (
+                            cc_pair_loop.status == ConnectorCredentialPairStatus.PAUSED
+                            and ctx.search_settings_status != IndexModelStatus.FUTURE
+                        )
+                        # if it's deleting, we don't care if this is a secondary index
+                        or cc_pair_loop.status == ConnectorCredentialPairStatus.DELETING
+                    ):
+                        # let the `except` block handle this
+                        raise RuntimeError("Connector was disabled mid run")
+
+                    index_attempt_loop = get_index_attempt(
+                        db_session_temp, index_attempt_id
+                    )
+                    if not index_attempt_loop:
+                        raise RuntimeError(
+                            f"Index attempt {index_attempt_id} not found in DB."
                        )

-                    _check_failure_threshold(
-                        total_failures, document_count, batch_num, failure
-                    )
-
-                # save the new checkpoint (if one is provided)
-                if next_checkpoint:
-                    checkpoint = next_checkpoint
-
-                # below is all document processing logic, so if no batch we can just continue
-                if document_batch is None:
-                    continue
+                    if index_attempt_loop.status != IndexingStatus.IN_PROGRESS:
+                        # Likely due to user manually disabling it or model swap
+                        raise RuntimeError(
+                            f"Index Attempt was canceled, status is {index_attempt_loop.status}"
+                        )

                batch_description = []

-                doc_batch_cleaned = strip_null_characters(document_batch)
+                doc_batch_cleaned = strip_null_characters(doc_batch)
                for doc in doc_batch_cleaned:
                    batch_description.append(doc.to_short_descriptor())

@@ -498,51 +377,15 @@ def _run_indexing(
                chunk_count += index_pipeline_result.total_chunks
                document_count += index_pipeline_result.total_docs

-                # resolve errors for documents that were successfully indexed
-                failed_document_ids = [
-                    failure.failed_document.document_id
-                    for failure in index_pipeline_result.failures
-                    if failure.failed_document
-                ]
-                successful_document_ids = [
-                    document.id
-                    for document in document_batch
-                    if document.id not in failed_document_ids
-                ]
-                for document_id in successful_document_ids:
-                    with get_session_with_current_tenant() as db_session_temp:
-                        if document_id in doc_id_to_unresolved_errors:
-                            logger.info(
-                                f"Resolving IndexAttemptError for document '{document_id}'"
-                            )
-                            for error in doc_id_to_unresolved_errors[document_id]:
-                                error.is_resolved = True
-                                db_session_temp.add(error)
-                        db_session_temp.commit()
-
-                # add brand new failures
-                if index_pipeline_result.failures:
-                    total_failures += len(index_pipeline_result.failures)
-                    with get_session_with_current_tenant() as db_session_temp:
-                        for failure in index_pipeline_result.failures:
-                            create_index_attempt_error(
-                                index_attempt_id,
-                                ctx.cc_pair_id,
-                                failure,
-                                db_session_temp,
-                            )
-
-                    _check_failure_threshold(
-                        total_failures,
-                        document_count,
-                        batch_num,
-                        index_pipeline_result.failures[-1],
-                    )
+                # commit transaction so that the `update` below begins
+                # with a brand new transaction. Postgres uses the start
+                # of the transactions when computing `NOW()`, so if we have
+                # a long running transaction, the `time_updated` field will
+                # be inaccurate
+                db_session.commit()

                # This new value is updated every batch, so UI can refresh per batch update
-                with get_session_with_current_tenant() as db_session_temp:
-                    # NOTE: Postgres uses the start of the transactions when computing `NOW()`
-                    # so we need either to commit() or to use a new session
+                with get_session_with_tenant(tenant_id) as db_session_temp:
                    update_docs_indexed(
                        db_session=db_session_temp,
                        index_attempt_id=index_attempt_id,
@@ -554,97 +397,126 @@ def _run_indexing(
                if callback:
                    callback.progress("_run_indexing", len(doc_batch_cleaned))

-                memory_tracer.increment_and_maybe_trace()
+                tracer_counter += 1
+                if (
+                    INDEXING_TRACER_INTERVAL > 0
+                    and tracer_counter % INDEXING_TRACER_INTERVAL == 0
+                ):
+                    logger.debug(
+                        f"Running trace comparison for batch {tracer_counter}. interval={INDEXING_TRACER_INTERVAL}"
+                    )
+                    tracer.snap()
+                    tracer.log_previous_diff(INDEXING_TRACER_NUM_PRINT_ENTRIES)

-            # `make sure the checkpoints aren't getting too large`at some regular interval
-            CHECKPOINT_SIZE_CHECK_INTERVAL = 100
-            if batch_num % CHECKPOINT_SIZE_CHECK_INTERVAL == 0:
-                check_checkpoint_size(checkpoint)
+            run_end_dt = window_end
+            if ctx.is_primary:
+                with get_session_with_tenant(tenant_id) as db_session_temp:
+                    update_connector_credential_pair(
+                        db_session=db_session_temp,
+                        connector_id=ctx.connector_id,
+                        credential_id=ctx.credential_id,
+                        net_docs=net_doc_change,
+                        run_dt=run_end_dt,
+                    )
+        except Exception as e:
+            logger.exception(
+                f"Connector run exceptioned after elapsed time: {time.time() - start_time} seconds"
+            )

-            # save latest checkpoint
-            with get_session_with_current_tenant() as db_session_temp:
-                save_checkpoint(
-                    db_session=db_session_temp,
-                    index_attempt_id=index_attempt_id,
-                    checkpoint=checkpoint,
-                )
+            if isinstance(e, ConnectorStopSignal):
+                with get_session_with_tenant(tenant_id) as db_session_temp:
+                    mark_attempt_canceled(
+                        index_attempt_id,
+                        db_session_temp,
+                        reason=str(e),
+                    )

-    except Exception as e:
-        logger.exception(
-            "Connector run exceptioned after elapsed time: "
-            f"{time.monotonic() - start_time} seconds"
+                    if ctx.is_primary:
+                        update_connector_credential_pair(
+                            db_session=db_session_temp,
+                            connector_id=ctx.connector_id,
+                            credential_id=ctx.credential_id,
+                            net_docs=net_doc_change,
+                        )
+
+                if INDEXING_TRACER_INTERVAL > 0:
+                    tracer.stop()
+                raise e
+            else:
+                # Only mark the attempt as a complete failure if this is the first indexing window.
+                # Otherwise, some progress was made - the next run will not start from the beginning.
+                # In this case, it is not accurate to mark it as a failure. When the next run begins,
+                # if that fails immediately, it will be marked as a failure.
+                #
+                # NOTE: if the connector is manually disabled, we should mark it as a failure regardless
+                # to give better clarity in the UI, as the next run will never happen.
+                if (
+                    ind == 0
+                    or (
+                        cc_pair_loop is not None and not cc_pair_loop.status.is_active()
+                    )
+                    or (
+                        index_attempt_loop is not None
+                        and index_attempt_loop.status != IndexingStatus.IN_PROGRESS
+                    )
+                ):
+                    with get_session_with_tenant(tenant_id) as db_session_temp:
+                        mark_attempt_failed(
+                            index_attempt_id,
+                            db_session_temp,
+                            failure_reason=str(e),
+                            full_exception_trace=traceback.format_exc(),
+                        )
+
+                        if ctx.is_primary:
+                            update_connector_credential_pair(
+                                db_session=db_session_temp,
+                                connector_id=ctx.connector_id,
+                                credential_id=ctx.credential_id,
+                                net_docs=net_doc_change,
+                            )
+
+                    if INDEXING_TRACER_INTERVAL > 0:
+                        tracer.stop()
+                    raise e
+
+            # break => similar to success case. As mentioned above, if the next run fails for the same
+            # reason it will then be marked as a failure
+            break
+
+    if INDEXING_TRACER_INTERVAL > 0:
+        logger.debug(
+            f"Running trace comparison between start and end of indexing. {tracer_counter} batches processed."
        )
-        if isinstance(e, ConnectorValidationError):
-            # On validation errors during indexing, we want to cancel the indexing attempt
-            # and mark the CCPair as invalid. This prevents the connector from being
-            # used in the future until the credentials are updated.
-            with get_session_with_current_tenant() as db_session_temp:
-                mark_attempt_canceled(
-                    index_attempt_id,
-                    db_session_temp,
-                    reason=str(e),
+        tracer.snap()
+        tracer.log_first_diff(INDEXING_TRACER_NUM_PRINT_ENTRIES)
+        tracer.stop()
+        logger.debug("Memory tracer stopped.")
+
+    if (
+        index_attempt_md.num_exceptions > 0
+        and index_attempt_md.num_exceptions >= batch_num
+    ):
+        with get_session_with_tenant(tenant_id) as db_session_temp:
+            mark_attempt_failed(
+                index_attempt_id,
+                db_session_temp,
+                failure_reason="All batches exceptioned.",
+            )
+            if ctx.is_primary:
+                update_connector_credential_pair(
+                    db_session=db_session_temp,
+                    connector_id=ctx.connector_id,
+                    credential_id=ctx.credential_id,
                )
+            raise Exception(
+                f"Connector failed - All batches exceptioned: batches={batch_num}"
+            )

-                if ctx.is_primary:
-                    update_connector_credential_pair(
-                        db_session=db_session_temp,
-                        connector_id=ctx.connector_id,
-                        credential_id=ctx.credential_id,
-                        status=ConnectorCredentialPairStatus.INVALID,
-                    )
-            memory_tracer.stop()
-            raise e
+    elapsed_time = time.time() - start_time

-        elif isinstance(e, ConnectorStopSignal):
-            with get_session_with_current_tenant() as db_session_temp:
-                mark_attempt_canceled(
-                    index_attempt_id,
-                    db_session_temp,
-                    reason=str(e),
-                )
-
-                if ctx.is_primary:
-                    update_connector_credential_pair(
-                        db_session=db_session_temp,
-                        connector_id=ctx.connector_id,
-                        credential_id=ctx.credential_id,
-                        net_docs=net_doc_change,
-                    )
-
-            memory_tracer.stop()
-            raise e
-        else:
-            with get_session_with_current_tenant() as db_session_temp:
-                mark_attempt_failed(
-                    index_attempt_id,
-                    db_session_temp,
-                    failure_reason=str(e),
-                    full_exception_trace=traceback.format_exc(),
-                )
-
-                if ctx.is_primary:
-                    update_connector_credential_pair(
-                        db_session=db_session_temp,
-                        connector_id=ctx.connector_id,
-                        credential_id=ctx.credential_id,
-                        net_docs=net_doc_change,
-                    )
-
-            memory_tracer.stop()
-            raise e
-
-    memory_tracer.stop()
-
-    elapsed_time = time.monotonic() - start_time
-    with get_session_with_current_tenant() as db_session_temp:
-        # resolve entity-based errors
-        for error in entity_based_unresolved_errors:
-            logger.info(f"Resolving IndexAttemptError for entity '{error.entity_id}'")
-            error.is_resolved = True
-            db_session_temp.add(error)
-            db_session_temp.commit()
-
-        if total_failures == 0:
+    with get_session_with_tenant(tenant_id) as db_session_temp:
+        if index_attempt_md.num_exceptions == 0:
            mark_attempt_succeeded(index_attempt_id, db_session_temp)

            create_milestone_and_report(
@@ -663,7 +535,7 @@ def _run_indexing(
            mark_attempt_partially_succeeded(index_attempt_id, db_session_temp)
            logger.info(
                f"Connector completed with some errors: "
-                f"failures={total_failures} "
+                f"exceptions={index_attempt_md.num_exceptions} "
                f"batches={batch_num} "
                f"docs={document_count} "
                f"chunks={chunk_count} "
@@ -675,7 +547,7 @@ def _run_indexing(
                db_session=db_session_temp,
                connector_id=ctx.connector_id,
                credential_id=ctx.credential_id,
-                run_dt=window_end,
+                run_dt=run_end_dt,
            )


@@ -686,43 +558,46 @@ def run_indexing_entrypoint(
    is_ee: bool = False,
    callback: IndexingHeartbeatInterface | None = None,
 ) -> None:
-    """Don't swallow exceptions here ... propagate them up."""
+    try:
+        if is_ee:
+            global_version.set_ee()

-    if is_ee:
-        global_version.set_ee()
-
-    # set the indexing attempt ID so that all log messages from this process
-    # will have it added as a prefix
-    TaskAttemptSingleton.set_cc_and_index_id(
-        index_attempt_id, connector_credential_pair_id
-    )
-    with get_session_with_current_tenant() as db_session:
-        # TODO: remove long running session entirely
-        attempt = transition_attempt_to_in_progress(index_attempt_id, db_session)
-
-        tenant_str = ""
-        if tenant_id is not None:
-            tenant_str = f" for tenant {tenant_id}"
-
-        connector_name = attempt.connector_credential_pair.connector.name
-        connector_config = (
-            attempt.connector_credential_pair.connector.connector_specific_config
+        # set the indexing attempt ID so that all log messages from this process
+        # will have it added as a prefix
+        TaskAttemptSingleton.set_cc_and_index_id(
+            index_attempt_id, connector_credential_pair_id
        )
-        credential_id = attempt.connector_credential_pair.credential_id
+        with get_session_with_tenant(tenant_id) as db_session:
+            # TODO: remove long running session entirely
+            attempt = transition_attempt_to_in_progress(index_attempt_id, db_session)

-    logger.info(
-        f"Indexing starting{tenant_str}: "
-        f"connector='{connector_name}' "
-        f"config='{connector_config}' "
-        f"credentials='{credential_id}'"
-    )
+            tenant_str = ""
+            if tenant_id is not None:
+                tenant_str = f" for tenant {tenant_id}"

-    with get_session_with_current_tenant() as db_session:
-        _run_indexing(db_session, index_attempt_id, tenant_id, callback)
+            connector_name = attempt.connector_credential_pair.connector.name
+            connector_config = (
+                attempt.connector_credential_pair.connector.connector_specific_config
+            )
+            credential_id = attempt.connector_credential_pair.credential_id

-    logger.info(
-        f"Indexing finished{tenant_str}: "
-        f"connector='{connector_name}' "
-        f"config='{connector_config}' "
-        f"credentials='{credential_id}'"
-    )
+        logger.info(
+            f"Indexing starting{tenant_str}: "
+            f"connector='{connector_name}' "
+            f"config='{connector_config}' "
+            f"credentials='{credential_id}'"
+        )
+
+        with get_session_with_tenant(tenant_id) as db_session:
+            _run_indexing(db_session, index_attempt_id, tenant_id, callback)
+
+        logger.info(
+            f"Indexing finished{tenant_str}: "
+            f"connector='{connector_name}' "
+            f"config='{connector_config}' "
+            f"credentials='{credential_id}'"
+        )
+    except Exception as e:
+        logger.exception(
+            f"Indexing job with ID '{index_attempt_id}' for tenant {tenant_id} failed due to {e}"
+        )
--- a/backend/onyx/background/indexing/tracer.py
+++ b/backend/onyx/background/indexing/tracer.py
@@ -0,0 +1,77 @@
+import tracemalloc
+
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+DANSWER_TRACEMALLOC_FRAMES = 10
+
+
+class OnyxTracer:
+    def __init__(self) -> None:
+        self.snapshot_first: tracemalloc.Snapshot | None = None
+        self.snapshot_prev: tracemalloc.Snapshot | None = None
+        self.snapshot: tracemalloc.Snapshot | None = None
+
+    def start(self) -> None:
+        tracemalloc.start(DANSWER_TRACEMALLOC_FRAMES)
+
+    def stop(self) -> None:
+        tracemalloc.stop()
+
+    def snap(self) -> None:
+        snapshot = tracemalloc.take_snapshot()
+        # Filter out irrelevant frames (e.g., from tracemalloc itself or importlib)
+        snapshot = snapshot.filter_traces(
+            (
+                tracemalloc.Filter(False, tracemalloc.__file__),  # Exclude tracemalloc
+                tracemalloc.Filter(
+                    False, "<frozen importlib._bootstrap>"
+                ),  # Exclude importlib
+                tracemalloc.Filter(
+                    False, "<frozen importlib._bootstrap_external>"
+                ),  # Exclude external importlib
+            )
+        )
+
+        if not self.snapshot_first:
+            self.snapshot_first = snapshot
+
+        if self.snapshot:
+            self.snapshot_prev = self.snapshot
+
+        self.snapshot = snapshot
+
+    def log_snapshot(self, numEntries: int) -> None:
+        if not self.snapshot:
+            return
+
+        stats = self.snapshot.statistics("traceback")
+        for s in stats[:numEntries]:
+            logger.debug(f"Tracer snap: {s}")
+            for line in s.traceback:
+                logger.debug(f"* {line}")
+
+    @staticmethod
+    def log_diff(
+        snap_current: tracemalloc.Snapshot,
+        snap_previous: tracemalloc.Snapshot,
+        numEntries: int,
+    ) -> None:
+        stats = snap_current.compare_to(snap_previous, "traceback")
+        for s in stats[:numEntries]:
+            logger.debug(f"Tracer diff: {s}")
+            for line in s.traceback.format():
+                logger.debug(f"* {line}")
+
+    def log_previous_diff(self, numEntries: int) -> None:
+        if not self.snapshot or not self.snapshot_prev:
+            return
+
+        OnyxTracer.log_diff(self.snapshot, self.snapshot_prev, numEntries)
+
+    def log_first_diff(self, numEntries: int) -> None:
+        if not self.snapshot or not self.snapshot_first:
+            return
+
+        OnyxTracer.log_diff(self.snapshot, self.snapshot_first, numEntries)
--- a/backend/onyx/chat/answer.py
+++ b/backend/onyx/chat/answer.py
@@ -27,10 +27,8 @@ from onyx.file_store.utils import InMemoryChatFile
 from onyx.llm.interfaces import LLM
 from onyx.tools.force import ForceUseTool
 from onyx.tools.tool import Tool
-from onyx.tools.tool_implementations.search.search_tool import QUERY_FIELD
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.utils import explicit_tool_calling_supported
-from onyx.utils.gpu_utils import gpu_status_request
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -82,26 +80,6 @@ class Answer:
            and not skip_explicit_tool_calling
        )

-        rerank_settings = search_request.rerank_settings
-
-        using_cloud_reranking = (
-            rerank_settings is not None
-            and rerank_settings.rerank_provider_type is not None
-        )
-        allow_agent_reranking = gpu_status_request() or using_cloud_reranking
-
-        # TODO: this is a hack to force the query to be used for the search tool
-        #       this should be removed once we fully unify graph inputs (i.e.
-        #       remove SearchQuery entirely)
-        if (
-            force_use_tool.force_use
-            and search_tool
-            and force_use_tool.args
-            and force_use_tool.tool_name == search_tool.name
-            and QUERY_FIELD in force_use_tool.args
-        ):
-            search_request.query = force_use_tool.args[QUERY_FIELD]
-
        self.graph_inputs = GraphInputs(
            search_request=search_request,
            prompt_builder=prompt_builder,
@@ -116,6 +94,7 @@ class Answer:
            force_use_tool=force_use_tool,
            using_tool_calling_llm=using_tool_calling_llm,
        )
+        assert db_session, "db_session must be provided for agentic persistence"
        self.graph_persistence = GraphPersistence(
            db_session=db_session,
            chat_session_id=chat_session_id,
@@ -125,7 +104,6 @@ class Answer:
            use_agentic_search=use_agentic_search,
            skip_gen_ai_answer_generation=skip_gen_ai_answer_generation,
            allow_refinement=True,
-            allow_agent_reranking=allow_agent_reranking,
        )
        self.graph_config = GraphConfig(
            inputs=self.graph_inputs,
--- a/backend/onyx/chat/chat_utils.py
+++ b/backend/onyx/chat/chat_utils.py
@@ -190,8 +190,7 @@ def create_chat_chain(
            and previous_message.message_type == MessageType.ASSISTANT
            and mainline_messages
        ):
-            if current_message.refined_answer_improvement:
-                mainline_messages[-1] = current_message
+            mainline_messages[-1] = current_message
        else:
            mainline_messages.append(current_message)

--- a/backend/onyx/chat/models.py
+++ b/backend/onyx/chat/models.py
@@ -142,15 +142,6 @@ class MessageResponseIDInfo(BaseModel):
    reserved_assistant_message_id: int


-class AgentMessageIDInfo(BaseModel):
-    level: int
-    message_id: int
-
-
-class AgenticMessageResponseIDInfo(BaseModel):
-    agentic_message_ids: list[AgentMessageIDInfo]
-
-
 class StreamingError(BaseModel):
    error: str
    stack_trace: str | None = None
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -7,12 +7,10 @@ from typing import cast

 from sqlalchemy.orm import Session

-from onyx.agents.agent_search.orchestration.nodes.call_tool import ToolCallException
+from onyx.agents.agent_search.orchestration.nodes.tool_call import ToolCallException
 from onyx.chat.answer import Answer
 from onyx.chat.chat_utils import create_chat_chain
 from onyx.chat.chat_utils import create_temporary_persona
-from onyx.chat.models import AgenticMessageResponseIDInfo
-from onyx.chat.models import AgentMessageIDInfo
 from onyx.chat.models import AgentSearchPacket
 from onyx.chat.models import AllCitations
 from onyx.chat.models import AnswerPostInfo
@@ -145,10 +143,9 @@ from onyx.utils.long_term_log import LongTermLogger
 from onyx.utils.telemetry import mt_cloud_telemetry
 from onyx.utils.timing import log_function_time
 from onyx.utils.timing import log_generator_function_time
-from shared_configs.contextvars import get_current_tenant_id
+from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

 logger = setup_logger()
-ERROR_TYPE_CANCELLED = "cancelled"


 def _translate_citations(
@@ -310,7 +307,6 @@ ChatPacket = (
    | CustomToolResponse
    | MessageSpecificCitations
    | MessageResponseIDInfo
-    | AgenticMessageResponseIDInfo
    | StreamStopInfo
    | AgentSearchPacket
 )
@@ -346,7 +342,7 @@ def stream_chat_message_objects(
    3. [always] A set of streamed LLM tokens or an error anywhere along the line if something fails
    4. [always] Details on the final AI response message that is created
    """
-    tenant_id = get_current_tenant_id()
+    tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
    use_existing_user_message = new_msg_req.use_existing_user_message
    existing_assistant_message_id = new_msg_req.existing_assistant_message_id

@@ -635,7 +631,6 @@ def stream_chat_message_objects(
            db_session=db_session,
            commit=False,
            reserved_message_id=reserved_message_id,
-            is_agentic=new_msg_req.use_agentic_search,
        )

        prompt_override = new_msg_req.prompt_override or chat_session.prompt_override
@@ -1020,7 +1015,7 @@ def stream_chat_message_objects(
                if info.message_specific_citations
                else None
            ),
-            error=ERROR_TYPE_CANCELLED if answer.is_cancelled() else None,
+            error=None,
            tool_call=(
                ToolCall(
                    tool_id=tool_name_to_tool_id[info.tool_result.tool_name],
@@ -1038,7 +1033,6 @@ def stream_chat_message_objects(
        next_level = 1
        prev_message = gen_ai_response_message
        agent_answers = answer.llm_answer_by_level()
-        agentic_message_ids = []
        while next_level in agent_answers:
            next_answer = agent_answers[next_level]
            info = info_by_subq[
@@ -1059,12 +1053,7 @@ def stream_chat_message_objects(
                citations=info.message_specific_citations.citation_map
                if info.message_specific_citations
                else None,
-                error=ERROR_TYPE_CANCELLED if answer.is_cancelled() else None,
                refined_answer_improvement=refined_answer_improvement,
-                is_agentic=True,
-            )
-            agentic_message_ids.append(
-                AgentMessageIDInfo(level=next_level, message_id=next_answer_message.id)
            )
            next_level += 1
            prev_message = next_answer_message
@@ -1072,9 +1061,11 @@ def stream_chat_message_objects(
        logger.debug("Committing messages")
        db_session.commit()  # actually save user / assistant message

-        yield AgenticMessageResponseIDInfo(agentic_message_ids=agentic_message_ids)
+        msg_detail_response = translate_db_message_to_chat_message_detail(
+            gen_ai_response_message
+        )

-        yield translate_db_message_to_chat_message_detail(gen_ai_response_message)
+        yield msg_detail_response
    except Exception as e:
        error_msg = str(e)
        logger.exception(error_msg)
--- a/backend/onyx/configs/agent_configs.py
+++ b/backend/onyx/configs/agent_configs.py
@@ -8,101 +8,14 @@ AGENT_DEFAULT_RERANKING_HITS = 10
 AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8
 AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3
 AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5
-
-AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER = 25
-AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER = 35
-
-
 AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 5
 AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
 AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
 AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000

-INITIAL_SEARCH_DECOMPOSITION_ENABLED = True
-ALLOW_REFINEMENT = True
-
-AGENT_DEFAULT_RETRIEVAL_HITS = 15
-AGENT_DEFAULT_RERANKING_HITS = 10
-AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8
-AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3
-AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5
-AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 5
-AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
-AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
-AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000
-
-AGENT_ANSWER_GENERATION_BY_FAST_LLM = (
-    os.environ.get("AGENT_ANSWER_GENERATION_BY_FAST_LLM", "").lower() == "true"
-)
-
-AGENT_RETRIEVAL_STATS = (
-    not os.environ.get("AGENT_RETRIEVAL_STATS") == "False"
-) or True  # default True
-
-
-AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
-    os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
-)  # 15
-
-AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
-    os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
-)  # 15
-
-# Reranking agent configs
-# Reranking stats - no influence on flow outside of stats collection
-AGENT_RERANKING_STATS = (
-    not os.environ.get("AGENT_RERANKING_STATS") == "True"
-) or False  # default False
-
-AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
-    os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
-)  # 15
-
-AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS = int(
-    os.environ.get("AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS")
-    or AGENT_DEFAULT_RERANKING_HITS
-)  # 10
-
-AGENT_NUM_DOCS_FOR_DECOMPOSITION = int(
-    os.environ.get("AGENT_NUM_DOCS_FOR_DECOMPOSITION")
-    or AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION
-)  # 3
-
-AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = int(
-    os.environ.get("AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION")
-    or AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION
-)  # 5
-
-AGENT_EXPLORATORY_SEARCH_RESULTS = int(
-    os.environ.get("AGENT_EXPLORATORY_SEARCH_RESULTS")
-    or AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS
-)  # 5
-
-AGENT_MIN_ORIG_QUESTION_DOCS = int(
-    os.environ.get("AGENT_MIN_ORIG_QUESTION_DOCS")
-    or AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS
-)  # 3
-
-AGENT_MAX_ANSWER_CONTEXT_DOCS = int(
-    os.environ.get("AGENT_MAX_ANSWER_CONTEXT_DOCS")
-    or AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS
-)  # 8
-
-
-AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
-    os.environ.get("AGENT_MAX_STATIC_HISTORY_WORD_LENGTH")
-    or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH
-)  # 2000
-
-AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER = int(
-    os.environ.get("AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER")
-    or AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
-)  # 25
-
-AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER = int(
-    os.environ.get("AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER")
-    or AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
-)  # 35
+#####
+# Agent Configs
+#####


 AGENT_RETRIEVAL_STATS = (
@@ -164,173 +77,4 @@ AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
    or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH
 )  # 2000

-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION = 10  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_ENTITY_TERM_EXTRACTION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION = 30  # in seconds
-AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_ENTITY_TERM_EXTRACTION
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION = 3  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_DOCUMENT_VERIFICATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_DOCUMENT_VERIFICATION = 5  # in seconds
-AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_DOCUMENT_VERIFICATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_DOCUMENT_VERIFICATION
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION = 5  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_GENERAL_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_GENERAL_GENERATION = 30  # in seconds
-AGENT_TIMEOUT_LLM_GENERAL_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_GENERAL_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_GENERAL_GENERATION
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION = 4  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBQUESTION_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_SUBQUESTION_GENERATION = 5  # in seconds
-AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_SUBQUESTION_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_SUBQUESTION_GENERATION
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = 4  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION = 30  # in seconds
-AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_SUBANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_GENERATION
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = 5  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_INITIAL_ANSWER_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = 25  # in seconds
-AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_INITIAL_ANSWER_GENERATION
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = 5  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = 30  # in seconds
-AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_GENERATION
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK = 4  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_SUBANSWER_CHECK
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_CHECK = 8  # in seconds
-AGENT_TIMEOUT_LLM_SUBANSWER_CHECK = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_SUBANSWER_CHECK")
-    or AGENT_DEFAULT_TIMEOUT_LLM_SUBANSWER_CHECK
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION = 4  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_SUBQUESTION_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION = 8  # in seconds
-AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_SUBQUESTION_GENERATION
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION = 2  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_QUERY_REWRITING_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION = 3  # in seconds
-AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_QUERY_REWRITING_GENERATION
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION = 4  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_HISTORY_SUMMARY_GENERATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION = 5  # in seconds
-AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_HISTORY_SUMMARY_GENERATION
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS = 4  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_COMPARE_ANSWERS
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_COMPARE_ANSWERS = 8  # in seconds
-AGENT_TIMEOUT_LLM_COMPARE_ANSWERS = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_COMPARE_ANSWERS")
-    or AGENT_DEFAULT_TIMEOUT_LLM_COMPARE_ANSWERS
-)
-
-
-AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION = 4  # in seconds
-AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION = int(
-    os.environ.get("AGENT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION")
-    or AGENT_DEFAULT_TIMEOUT_CONNECT_LLM_REFINED_ANSWER_VALIDATION
-)
-
-AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = 8  # in seconds
-AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION = int(
-    os.environ.get("AGENT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION")
-    or AGENT_DEFAULT_TIMEOUT_LLM_REFINED_ANSWER_VALIDATION
-)
-
 GRAPH_VERSION_NAME: str = "a"
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -158,7 +158,7 @@ POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres"
 POSTGRES_PASSWORD = urllib.parse.quote_plus(
    os.environ.get("POSTGRES_PASSWORD") or "password"
 )
-POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "127.0.0.1"
+POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost"
 POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432"
 POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres"
 AWS_REGION_NAME = os.environ.get("AWS_REGION_NAME") or "us-east-2"
@@ -169,11 +169,6 @@ POSTGRES_API_SERVER_POOL_SIZE = int(
 POSTGRES_API_SERVER_POOL_OVERFLOW = int(
    os.environ.get("POSTGRES_API_SERVER_POOL_OVERFLOW") or 10
 )
-
-# defaults to False
-# generally should only be used for
-POSTGRES_USE_NULL_POOL = os.environ.get("POSTGRES_USE_NULL_POOL", "").lower() == "true"
-
 # defaults to False
 POSTGRES_POOL_PRE_PING = os.environ.get("POSTGRES_POOL_PRE_PING", "").lower() == "true"

@@ -626,10 +621,6 @@ POD_NAMESPACE = os.environ.get("POD_NAMESPACE")

 DEV_MODE = os.environ.get("DEV_MODE", "").lower() == "true"

-INTEGRATION_TESTS_MODE = os.environ.get("INTEGRATION_TESTS_MODE", "").lower() == "true"
-
-MOCK_CONNECTOR_FILE_PATH = os.environ.get("MOCK_CONNECTOR_FILE_PATH")
-
 TEST_ENV = os.environ.get("TEST_ENV", "").lower() == "true"

 # Set to true to mock LLM responses for testing purposes
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -98,18 +98,9 @@ CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT = 120

 CELERY_PRIMARY_WORKER_LOCK_TIMEOUT = 120

-
-# hard timeout applied by the watchdog to the indexing connector run
-# to handle hung connectors
-CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT = 3 * 60 * 60  # 3 hours (in seconds)
-
-# soft timeout for the lock taken by the indexing connector run
-# allows the lock to eventually expire if the managing code around it dies
+# needs to be long enough to cover the maximum time it takes to download an object
 # if we can get callbacks as object bytes download, we could lower this a lot.
-# CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT + 15 minutes
-# hard termination should always fire first if the connector is hung
-CELERY_INDEXING_LOCK_TIMEOUT = CELERY_INDEXING_WATCHDOG_CONNECTOR_TIMEOUT + 900
-
+CELERY_INDEXING_LOCK_TIMEOUT = 3 * 60 * 60  # 60 min

 # how long a task should wait for associated fence to be ready
 CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT = 5 * 60  # 5 min
@@ -134,7 +125,6 @@ class DocumentSource(str, Enum):
    GMAIL = "gmail"
    REQUESTTRACKER = "requesttracker"
    GITHUB = "github"
-    GITBOOK = "gitbook"
    GITLAB = "gitlab"
    GURU = "guru"
    BOOKSTACK = "bookstack"
@@ -174,9 +164,6 @@ class DocumentSource(str, Enum):
    EGNYTE = "egnyte"
    AIRTABLE = "airtable"

-    # Special case just for integration tests
-    MOCK_CONNECTOR = "mock_connector"
-

 DocumentSourceRequiringTenantContext: list[DocumentSource] = [DocumentSource.FILE]

@@ -255,7 +242,6 @@ class FileOrigin(str, Enum):
    CHAT_IMAGE_GEN = "chat_image_gen"
    CONNECTOR = "connector"
    GENERATED_REPORT = "generated_report"
-    INDEXING_CHECKPOINT = "indexing_checkpoint"
    OTHER = "other"


@@ -287,7 +273,6 @@ class OnyxCeleryQueues:
    DOC_PERMISSIONS_UPSERT = "doc_permissions_upsert"
    CONNECTOR_DELETION = "connector_deletion"
    LLM_MODEL_UPDATE = "llm_model_update"
-    CHECKPOINT_CLEANUP = "checkpoint_cleanup"

    # Heavy queue
    CONNECTOR_PRUNING = "connector_pruning"
@@ -307,7 +292,6 @@ class OnyxRedisLocks:
    CHECK_CONNECTOR_DELETION_BEAT_LOCK = "da_lock:check_connector_deletion_beat"
    CHECK_PRUNE_BEAT_LOCK = "da_lock:check_prune_beat"
    CHECK_INDEXING_BEAT_LOCK = "da_lock:check_indexing_beat"
-    CHECK_CHECKPOINT_CLEANUP_BEAT_LOCK = "da_lock:check_checkpoint_cleanup_beat"
    CHECK_CONNECTOR_DOC_PERMISSIONS_SYNC_BEAT_LOCK = (
        "da_lock:check_connector_doc_permissions_sync_beat"
    )
@@ -383,10 +367,6 @@ class OnyxCeleryTask:
    CHECK_FOR_EXTERNAL_GROUP_SYNC = "check_for_external_group_sync"
    CHECK_FOR_LLM_MODEL_UPDATE = "check_for_llm_model_update"

-    # Connector checkpoint cleanup
-    CHECK_FOR_CHECKPOINT_CLEANUP = "check_for_checkpoint_cleanup"
-    CLEANUP_CHECKPOINT = "cleanup_checkpoint"
-
    MONITOR_BACKGROUND_PROCESSES = "monitor_background_processes"
    MONITOR_CELERY_QUEUES = "monitor_celery_queues"

--- a/backend/onyx/connectors/airtable/airtable_connector.py
+++ b/backend/onyx/connectors/airtable/airtable_connector.py
@@ -245,7 +245,7 @@ class AirtableConnector(LoadConnector):
            return [(" ".join(combined) if combined else str(field_info), default_link)]

        if isinstance(field_info, list):
-            return [(str(item), default_link) for item in field_info]
+            return [(item, default_link) for item in field_info]

        return [(str(field_info), default_link)]

@@ -268,7 +268,7 @@ class AirtableConnector(LoadConnector):
        table_id: str,
        view_id: str | None,
        record_id: str,
-    ) -> tuple[list[Section], dict[str, str | list[str]]]:
+    ) -> tuple[list[Section], dict[str, Any]]:
        """
        Process a single Airtable field and return sections or metadata.

@@ -342,7 +342,7 @@ class AirtableConnector(LoadConnector):
        record_id = record["id"]
        fields = record["fields"]
        sections: list[Section] = []
-        metadata: dict[str, str | list[str]] = {}
+        metadata: dict[str, Any] = {}

        # Get primary field value if it exists
        primary_field_value = (
--- a/backend/onyx/connectors/bookstack/client.py
+++ b/backend/onyx/connectors/bookstack/client.py
@@ -5,8 +5,6 @@ import requests

 class BookStackClientRequestFailedError(ConnectionError):
    def __init__(self, status: int, error: str) -> None:
-        self.status_code = status
-        self.error = error
        super().__init__(
            "BookStack Client request failed with status {status}: {error}".format(
                status=status, error=error
--- a/backend/onyx/connectors/bookstack/connector.py
+++ b/backend/onyx/connectors/bookstack/connector.py
@@ -7,12 +7,8 @@ from typing import Any
 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
 from onyx.connectors.bookstack.client import BookStackApiClient
-from onyx.connectors.bookstack.client import BookStackClientRequestFailedError
 from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
-from onyx.connectors.interfaces import ConnectorValidationError
-from onyx.connectors.interfaces import CredentialExpiredError
 from onyx.connectors.interfaces import GenerateDocumentsOutput
-from onyx.connectors.interfaces import InsufficientPermissionsError
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
@@ -218,39 +214,3 @@ class BookstackConnector(LoadConnector, PollConnector):
                    break
                else:
                    time.sleep(0.2)
-
-    def validate_connector_settings(self) -> None:
-        """
-        Validate that the BookStack credentials and connector settings are correct.
-        Specifically checks that we can make an authenticated request to BookStack.
-        """
-        if not self.bookstack_client:
-            raise ConnectorMissingCredentialError(
-                "BookStack credentials have not been loaded."
-            )
-
-        try:
-            # Attempt to fetch a small batch of books (arbitrary endpoint) to verify credentials
-            _ = self.bookstack_client.get(
-                "/books", params={"count": "1", "offset": "0"}
-            )
-
-        except BookStackClientRequestFailedError as e:
-            # Check for HTTP status codes
-            if e.status_code == 401:
-                raise CredentialExpiredError(
-                    "Your BookStack credentials appear to be invalid or expired (HTTP 401)."
-                ) from e
-            elif e.status_code == 403:
-                raise InsufficientPermissionsError(
-                    "The configured BookStack token does not have sufficient permissions (HTTP 403)."
-                ) from e
-            else:
-                raise ConnectorValidationError(
-                    f"Unexpected BookStack error (status={e.status_code}): {e}"
-                ) from e
-
-        except Exception as exc:
-            raise ConnectorValidationError(
-                f"Unexpected error while validating BookStack connector settings: {exc}"
-            ) from exc
--- a/backend/onyx/connectors/confluence/onyx_confluence.py
+++ b/backend/onyx/connectors/confluence/onyx_confluence.py
@@ -8,7 +8,6 @@ from typing import TypeVar
 from urllib.parse import quote

 from atlassian import Confluence  # type:ignore
-from pydantic import BaseModel
 from requests import HTTPError

 from onyx.utils.logger import setup_logger
@@ -30,16 +29,6 @@ class ConfluenceRateLimitError(Exception):
    pass


-class ConfluenceUser(BaseModel):
-    user_id: str  # accountId in Cloud, userKey in Server
-    username: str | None  # Confluence Cloud doesn't give usernames
-    display_name: str
-    # Confluence Data Center doesn't give email back by default,
-    # have to fetch it with a different endpoint
-    email: str | None
-    type: str
-
-
 def _handle_http_error(e: HTTPError, attempt: int) -> int:
    MIN_DELAY = 2
    MAX_DELAY = 60
@@ -286,95 +275,21 @@ class OnyxConfluence(Confluence):
        self,
        expand: str | None = None,
        limit: int | None = None,
-    ) -> Iterator[ConfluenceUser]:
+    ) -> Iterator[dict[str, Any]]:
        """
        The search/user endpoint can be used to fetch users.
        It's a seperate endpoint from the content/search endpoint used only for users.
        Otherwise it's very similar to the content/search endpoint.
        """
-        if self.cloud:
-            cql = "type=user"
-            url = "rest/api/search/user"
-            expand_string = f"&expand={expand}" if expand else ""
-            url += f"?cql={cql}{expand_string}"
-            for user_result in self._paginate_url(url, limit):
-                # Example response:
-                # {
-                #     'user': {
-                #         'type': 'known',
-                #         'accountId': '712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',
-                #         'accountType': 'atlassian',
-                #         'email': 'chris@danswer.ai',
-                #         'publicName': 'Chris Weaver',
-                #         'profilePicture': {
-                #             'path': '/wiki/aa-avatar/712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',
-                #             'width': 48,
-                #             'height': 48,
-                #             'isDefault': False
-                #         },
-                #         'displayName': 'Chris Weaver',
-                #         'isExternalCollaborator': False,
-                #         '_expandable': {
-                #             'operations': '',
-                #             'personalSpace': ''
-                #         },
-                #         '_links': {
-                #             'self': 'https://danswerai.atlassian.net/wiki/rest/api/user?accountId=712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d'
-                #         }
-                #     },
-                #     'title': 'Chris Weaver',
-                #     'excerpt': '',
-                #     'url': '/people/712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',
-                #     'breadcrumbs': [],
-                #     'entityType': 'user',
-                #     'iconCssClass': 'aui-icon content-type-profile',
-                #     'lastModified': '2025-02-18T04:08:03.579Z',
-                #     'score': 0.0
-                # }
-                user = user_result["user"]
-                yield ConfluenceUser(
-                    user_id=user["accountId"],
-                    username=None,
-                    display_name=user["displayName"],
-                    email=user.get("email"),
-                    type=user["accountType"],
-                )
-        else:
-            # https://developer.atlassian.com/server/confluence/rest/v900/api-group-user/#api-rest-api-user-list-get
-            # ^ is only available on data center deployments
-            # Example response:
-            # [
-            #     {
-            #         'type': 'known',
-            #         'username': 'admin',
-            #         'userKey': '40281082950c5fe901950c61c55d0000',
-            #         'profilePicture': {
-            #             'path': '/images/icons/profilepics/default.svg',
-            #             'width': 48,
-            #             'height': 48,
-            #             'isDefault': True
-            #         },
-            #         'displayName': 'Admin Test',
-            #         '_links': {
-            #             'self': 'http://localhost:8090/rest/api/user?key=40281082950c5fe901950c61c55d0000'
-            #         },
-            #         '_expandable': {
-            #             'status': ''
-            #         }
-            #     }
-            # ]
-            for user in self._paginate_url("rest/api/user/list", limit):
-                yield ConfluenceUser(
-                    user_id=user["userKey"],
-                    username=user["username"],
-                    display_name=user["displayName"],
-                    email=None,
-                    type=user.get("type", "user"),
-                )
+        cql = "type=user"
+        url = "rest/api/search/user" if self.cloud else "rest/api/search"
+        expand_string = f"&expand={expand}" if expand else ""
+        url += f"?cql={cql}{expand_string}"
+        yield from self._paginate_url(url, limit)

    def paginated_groups_by_user_retrieval(
        self,
-        user_id: str,  # accountId in Cloud, userKey in Server
+        user: dict[str, Any],
        limit: int | None = None,
    ) -> Iterator[dict[str, Any]]:
        """
@@ -382,7 +297,7 @@ class OnyxConfluence(Confluence):
        It's a confluence specific endpoint that can be used to fetch groups.
        """
        user_field = "accountId" if self.cloud else "key"
-        user_value = user_id
+        user_value = user["accountId"] if self.cloud else user["userKey"]
        # Server uses userKey (but calls it key during the API call), Cloud uses accountId
        user_query = f"{user_field}={quote(user_value)}"

--- a/backend/onyx/connectors/connector_runner.py
+++ b/backend/onyx/connectors/connector_runner.py
@@ -1,16 +1,11 @@
 import sys
 import time
-from collections.abc import Generator
 from datetime import datetime

 from onyx.connectors.interfaces import BaseConnector
-from onyx.connectors.interfaces import CheckpointConnector
-from onyx.connectors.interfaces import CheckpointOutput
+from onyx.connectors.interfaces import GenerateDocumentsOutput
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
-from onyx.connectors.models import ConnectorCheckpoint
-from onyx.connectors.models import ConnectorFailure
-from onyx.connectors.models import Document
 from onyx.utils.logger import setup_logger


@@ -20,139 +15,48 @@ logger = setup_logger()
 TimeRange = tuple[datetime, datetime]


-class CheckpointOutputWrapper:
-    """
-    Wraps a CheckpointOutput generator to give things back in a more digestible format.
-    The connector format is easier for the connector implementor (e.g. it enforces exactly
-    one new checkpoint is returned AND that the checkpoint is at the end), thus the different
-    formats.
-    """
-
-    def __init__(self) -> None:
-        self.next_checkpoint: ConnectorCheckpoint | None = None
-
-    def __call__(
-        self,
-        checkpoint_connector_generator: CheckpointOutput,
-    ) -> Generator[
-        tuple[Document | None, ConnectorFailure | None, ConnectorCheckpoint | None],
-        None,
-        None,
-    ]:
-        # grabs the final return value and stores it in the `next_checkpoint` variable
-        def _inner_wrapper(
-            checkpoint_connector_generator: CheckpointOutput,
-        ) -> CheckpointOutput:
-            self.next_checkpoint = yield from checkpoint_connector_generator
-            return self.next_checkpoint  # not used
-
-        for document_or_failure in _inner_wrapper(checkpoint_connector_generator):
-            if isinstance(document_or_failure, Document):
-                yield document_or_failure, None, None
-            elif isinstance(document_or_failure, ConnectorFailure):
-                yield None, document_or_failure, None
-            else:
-                raise ValueError(
-                    f"Invalid document_or_failure type: {type(document_or_failure)}"
-                )
-
-        if self.next_checkpoint is None:
-            raise RuntimeError(
-                "Checkpoint is None. This should never happen - the connector should always return a checkpoint."
-            )
-
-        yield None, None, self.next_checkpoint
-
-
 class ConnectorRunner:
-    """
-    Handles:
-        - Batching
-        - Additional exception logging
-        - Combining different connector types to a single interface
-    """
-
    def __init__(
        self,
        connector: BaseConnector,
-        batch_size: int,
        time_range: TimeRange | None = None,
+        fail_loudly: bool = False,
    ):
        self.connector = connector
-        self.time_range = time_range
-        self.batch_size = batch_size

-        self.doc_batch: list[Document] = []
+        if isinstance(self.connector, PollConnector):
+            if time_range is None:
+                raise ValueError("time_range is required for PollConnector")

-    def run(
-        self, checkpoint: ConnectorCheckpoint
-    ) -> Generator[
-        tuple[
-            list[Document] | None, ConnectorFailure | None, ConnectorCheckpoint | None
-        ],
-        None,
-        None,
-    ]:
+            self.doc_batch_generator = self.connector.poll_source(
+                time_range[0].timestamp(), time_range[1].timestamp()
+            )
+
+        elif isinstance(self.connector, LoadConnector):
+            if time_range and fail_loudly:
+                raise ValueError(
+                    "time_range specified, but passed in connector is not a PollConnector"
+                )
+
+            self.doc_batch_generator = self.connector.load_from_state()
+
+        else:
+            raise ValueError(f"Invalid connector. type: {type(self.connector)}")
+
+    def run(self) -> GenerateDocumentsOutput:
        """Adds additional exception logging to the connector."""
        try:
-            if isinstance(self.connector, CheckpointConnector):
-                if self.time_range is None:
-                    raise ValueError("time_range is required for CheckpointConnector")
+            start = time.monotonic()
+            for batch in self.doc_batch_generator:
+                # to know how long connector is taking
+                logger.debug(
+                    f"Connector took {time.monotonic() - start} seconds to build a batch."
+                )
+
+                yield batch

                start = time.monotonic()
-                checkpoint_connector_generator = self.connector.load_from_checkpoint(
-                    start=self.time_range[0].timestamp(),
-                    end=self.time_range[1].timestamp(),
-                    checkpoint=checkpoint,
-                )
-                next_checkpoint: ConnectorCheckpoint | None = None
-                # this is guaranteed to always run at least once with next_checkpoint being non-None
-                for document, failure, next_checkpoint in CheckpointOutputWrapper()(
-                    checkpoint_connector_generator
-                ):
-                    if document is not None:
-                        self.doc_batch.append(document)

-                    if failure is not None:
-                        yield None, failure, None
-
-                    if len(self.doc_batch) >= self.batch_size:
-                        yield self.doc_batch, None, None
-                        self.doc_batch = []
-
-                # yield remaining documents
-                if len(self.doc_batch) > 0:
-                    yield self.doc_batch, None, None
-                    self.doc_batch = []
-
-                yield None, None, next_checkpoint
-
-                logger.debug(
-                    f"Connector took {time.monotonic() - start} seconds to get to the next checkpoint."
-                )
-
-            else:
-                finished_checkpoint = ConnectorCheckpoint.build_dummy_checkpoint()
-                finished_checkpoint.has_more = False
-
-                if isinstance(self.connector, PollConnector):
-                    if self.time_range is None:
-                        raise ValueError("time_range is required for PollConnector")
-
-                    for document_batch in self.connector.poll_source(
-                        start=self.time_range[0].timestamp(),
-                        end=self.time_range[1].timestamp(),
-                    ):
-                        yield document_batch, None, None
-
-                    yield None, None, finished_checkpoint
-                elif isinstance(self.connector, LoadConnector):
-                    for document_batch in self.connector.load_from_state():
-                        yield document_batch, None, None
-
-                    yield None, None, finished_checkpoint
-                else:
-                    raise ValueError(f"Invalid connector. type: {type(self.connector)}")
        except Exception:
            exc_type, _, exc_traceback = sys.exc_info()

@@ -172,6 +76,6 @@ class ConnectorRunner:
            )
            logger.error(
                f"Error in connector. type: {exc_type};\n"
-                f"local_vars below -> \n{local_vars_str[:1024]}"
+                f"local_vars below -> \n{local_vars_str}"
            )
            raise
--- a/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py
+++ b/backend/onyx/connectors/cross_connector_utils/miscellaneous_utils.py
@@ -1,4 +1,3 @@
-import re
 from collections.abc import Callable
 from collections.abc import Iterator
 from datetime import datetime
@@ -25,22 +24,16 @@ def datetime_to_utc(dt: datetime) -> datetime:


 def time_str_to_utc(datetime_str: str) -> datetime:
-    # Remove all timezone abbreviations in parentheses
-    datetime_str = re.sub(r"\([A-Z]+\)", "", datetime_str).strip()
-
-    # Remove any remaining parentheses and their contents
-    datetime_str = re.sub(r"\(.*?\)", "", datetime_str).strip()
-
    try:
        dt = parse(datetime_str)
    except ValueError:
-        # Fix common format issues (e.g. "0000" => "+0000")
+        # Handle malformed timezone by attempting to fix common format issues
        if "0000" in datetime_str:
-            datetime_str = datetime_str.replace(" 0000", " +0000")
-            dt = parse(datetime_str)
+            # Convert "0000" to "+0000" for proper timezone parsing
+            fixed_dt_str = datetime_str.replace(" 0000", " +0000")
+            dt = parse(fixed_dt_str)
        else:
            raise
-
    return datetime_to_utc(dt)


--- a/backend/onyx/connectors/dropbox/connector.py
+++ b/backend/onyx/connectors/dropbox/connector.py
@@ -4,16 +4,12 @@ from typing import Any

 from dropbox import Dropbox  # type: ignore
 from dropbox.exceptions import ApiError  # type:ignore
-from dropbox.exceptions import AuthError  # type:ignore
 from dropbox.files import FileMetadata  # type:ignore
 from dropbox.files import FolderMetadata  # type:ignore

 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.constants import DocumentSource
-from onyx.connectors.interfaces import ConnectorValidationError
-from onyx.connectors.interfaces import CredentialInvalidError
 from onyx.connectors.interfaces import GenerateDocumentsOutput
-from onyx.connectors.interfaces import InsufficientPermissionsError
 from onyx.connectors.interfaces import LoadConnector
 from onyx.connectors.interfaces import PollConnector
 from onyx.connectors.interfaces import SecondsSinceUnixEpoch
@@ -145,29 +141,6 @@ class DropboxConnector(LoadConnector, PollConnector):

        return None

-    def validate_connector_settings(self) -> None:
-        if self.dropbox_client is None:
-            raise ConnectorMissingCredentialError("Dropbox credentials not loaded.")
-
-        try:
-            self.dropbox_client.files_list_folder(path="", limit=1)
-        except AuthError as e:
-            logger.exception("Failed to validate Dropbox credentials")
-            raise CredentialInvalidError(f"Dropbox credential is invalid: {e.error}")
-        except ApiError as e:
-            if (
-                e.error is not None
-                and "insufficient_permissions" in str(e.error).lower()
-            ):
-                raise InsufficientPermissionsError(
-                    "Your Dropbox token does not have sufficient permissions."
-                )
-            raise ConnectorValidationError(
-                f"Unexpected Dropbox error during validation: {e.user_message_text or e}"
-            )
-        except Exception as e:
-            raise Exception(f"Unexpected error during Dropbox settings validation: {e}")
-

 if __name__ == "__main__":
    import os
--- a/Show More
+++ b/Show More