k

remove playwright for now
2026-02-17 07:45:47 +00:00 · 2025-02-16 12:35:52 -08:00 · 2025-02-16 12:35:18 -08:00 · 2025-02-16 12:33:53 -08:00 · 2025-02-16 12:32:44 -08:00 · 2025-02-16 02:34:39 +00:00
726 changed files with 34830 additions and 10024 deletions
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -65,8 +65,10 @@ jobs:
            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
+            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
            NEXT_PUBLIC_GTM_ENABLED=true
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
+            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
            NODE_OPTIONS=--max-old-space-size=8192
          # needed due to weird interactions with the builds for different platforms
          no-cache: true
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -12,7 +12,32 @@ env:
  BUILDKIT_PROGRESS: plain

 jobs:
+  # 1) Preliminary job to check if the changed files are relevant
+  check_model_server_changes:
+    runs-on: ubuntu-latest
+    outputs:
+      changed: ${{ steps.check.outputs.changed }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Check if relevant files changed
+        id: check
+        run: |
+          # Default to "false"
+          echo "changed=false" >> $GITHUB_OUTPUT
+
+          # Compare the previous commit (github.event.before) to the current one (github.sha)
+          # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
+          # set changed=true
+          if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
+             | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
+            echo "changed=true" >> $GITHUB_OUTPUT
+          fi
+
  build-amd64:
+    needs: [check_model_server_changes]
+    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on:
      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
    steps:
@@ -52,6 +77,8 @@ jobs:
          provenance: false

  build-arm64:
+    needs: [check_model_server_changes]
+    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on:
      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"]
    steps:
@@ -91,7 +118,8 @@ jobs:
          provenance: false

  merge-and-scan:
-    needs: [build-amd64, build-arm64]
+    needs: [build-amd64, build-arm64, check_model_server_changes]
+    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on: ubuntu-latest
    steps:
      - name: Login to Docker Hub
--- a/.github/workflows/pr-helm-chart-testing.yml
+++ b/.github/workflows/pr-helm-chart-testing.yml
@@ -21,10 +21,10 @@ jobs:
    - name: Set up Helm
      uses: azure/setup-helm@v4.2.0
      with:
-        version: v3.14.4
+        version: v3.17.0
      
    - name: Set up chart-testing
-      uses: helm/chart-testing-action@v2.6.1
+      uses: helm/chart-testing-action@v2.7.0

    # even though we specify chart-dirs in ct.yaml, it isn't used by ct for the list-changed command...
    - name: Run chart-testing (list-changed)
@@ -37,22 +37,6 @@ jobs:
          echo "changed=true" >> "$GITHUB_OUTPUT"
        fi

-#     rkuo: I don't think we need python?
-#     - name: Set up Python
-#       uses: actions/setup-python@v5
-#       with:
-#         python-version: '3.11'
-#         cache: 'pip'
-#         cache-dependency-path: |
-#           backend/requirements/default.txt
-#           backend/requirements/dev.txt
-#           backend/requirements/model_server.txt
-#     - run: |
-#         python -m pip install --upgrade pip
-#         pip install --retries 5 --timeout 30 -r backend/requirements/default.txt
-#         pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt
-#         pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt
-
    # lint all charts if any changes were detected
    - name: Run chart-testing (lint)
      if: steps.list-changed.outputs.changed == 'true'
@@ -62,7 +46,7 @@ jobs:

    - name: Create kind cluster
      if: steps.list-changed.outputs.changed == 'true'
-      uses: helm/kind-action@v1.10.0
+      uses: helm/kind-action@v1.12.0

    - name: Run chart-testing (install)
      if: steps.list-changed.outputs.changed == 'true'
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -94,23 +94,27 @@ jobs:
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
          MULTI_TENANT=true \
-          AUTH_TYPE=basic \
+          AUTH_TYPE=cloud \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
-          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
+          DEV_MODE=true \
+          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack up -d
        id: start_docker_multi_tenant

      # In practice, `cloud` Auth type would require OAUTH credentials to be set.
      - name: Run Multi-Tenant Integration Tests
        run: |
+          echo "Waiting for 3 minutes to ensure API server is ready..."
+          sleep 180
          echo "Running integration tests..."
-          docker run --rm --network danswer-stack_default \
+          docker run --rm --network onyx-stack_default \
            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
            -e POSTGRES_DB=postgres \
+            -e POSTGRES_USE_NULL_POOL=true \
            -e VESPA_HOST=index \
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
@@ -119,6 +123,10 @@ jobs:
            -e TEST_WEB_HOSTNAME=test-runner \
            -e AUTH_TYPE=cloud \
            -e MULTI_TENANT=true \
+            -e REQUIRE_EMAIL_VERIFICATION=false \
+            -e DISABLE_TELEMETRY=true \
+            -e IMAGE_TAG=test \
+            -e DEV_MODE=true \
            onyxdotapp/onyx-integration:test \
            /app/tests/integration/multitenant_tests
        continue-on-error: true
@@ -126,34 +134,37 @@ jobs:

      - name: Check multi-tenant test results
        run: |
-          if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
-            echo "Integration tests failed. Exiting with error."
+          if [ ${{ steps.run_multitenant_tests.outcome }} == 'failure' ]; then
+            echo "Multi-tenant integration tests failed. Exiting with error."
            exit 1
          else
-            echo "All integration tests passed successfully."
+            echo "All multi-tenant integration tests passed successfully."
          fi

      - name: Stop multi-tenant Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
-
+          docker compose -f docker-compose.multitenant-dev.yml -p onyx-stack down -v
+      
+      # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections
      - name: Start Docker containers
        run: |
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
          AUTH_TYPE=basic \
+          POSTGRES_POOL_PRE_PING=true \
+          POSTGRES_USE_NULL_POOL=true \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
-          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
+          docker compose -f docker-compose.dev.yml -p onyx-stack up -d
        id: start_docker

      - name: Wait for service to be ready
        run: |
          echo "Starting wait-for-service script..."

-          docker logs -f danswer-stack-api_server-1 &
+          docker logs -f onyx-stack-api_server-1 &

          start_time=$(date +%s)
          timeout=300  # 5 minutes in seconds
@@ -183,15 +194,24 @@ jobs:
          done
          echo "Finished waiting for service."

+      - name: Start Mock Services
+        run: |
+          cd backend/tests/integration/mock_services
+          docker compose -f docker-compose.mock-it-services.yml \
+            -p mock-it-services-stack up -d
+      
+      # NOTE: Use pre-ping/null to reduce flakiness due to dropped connections
      - name: Run Standard Integration Tests
        run: |
          echo "Running integration tests..."
-          docker run --rm --network danswer-stack_default \
+          docker run --rm --network onyx-stack_default \
            --name test-runner \
            -e POSTGRES_HOST=relational_db \
            -e POSTGRES_USER=postgres \
            -e POSTGRES_PASSWORD=password \
            -e POSTGRES_DB=postgres \
+            -e POSTGRES_POOL_PRE_PING=true \
+            -e POSTGRES_USE_NULL_POOL=true \
            -e VESPA_HOST=index \
            -e REDIS_HOST=cache \
            -e API_SERVER_HOST=api_server \
@@ -201,6 +221,8 @@ jobs:
            -e CONFLUENCE_USER_NAME=${CONFLUENCE_USER_NAME} \
            -e CONFLUENCE_ACCESS_TOKEN=${CONFLUENCE_ACCESS_TOKEN} \
            -e TEST_WEB_HOSTNAME=test-runner \
+            -e MOCK_CONNECTOR_SERVER_HOST=mock_connector_server \
+            -e MOCK_CONNECTOR_SERVER_PORT=8001 \
            onyxdotapp/onyx-integration:test \
            /app/tests/integration/tests \
            /app/tests/integration/connector_job_tests
@@ -216,27 +238,30 @@ jobs:
            echo "All integration tests passed successfully."
          fi

-      # save before stopping the containers so the logs can be captured
-      - name: Save Docker logs
-        if: success() || failure()
+      # ------------------------------------------------------------
+      # Always gather logs BEFORE "down":
+      - name: Dump API server logs
+        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
-          mv docker-compose.log ${{ github.workspace }}/docker-compose.log
+          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true

-      - name: Stop Docker containers
+      - name: Dump all-container logs (optional)
+        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
+          docker compose -f docker-compose.dev.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
-        if: success() || failure()
+        if: always()
        uses: actions/upload-artifact@v4
        with:
-          name: docker-logs
+          name: docker-all-logs
          path: ${{ github.workspace }}/docker-compose.log
+      # ------------------------------------------------------------

      - name: Stop Docker containers
+        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
+          docker compose -f docker-compose.dev.yml -p onyx-stack down -v
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -1,6 +1,6 @@
-name: Run Chromatic Tests
+name: Run Playwright Tests
 concurrency:
-  group: Run-Chromatic-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
+  group: Run-Playwright-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true

 on: push
@@ -8,6 +8,8 @@ on: push
 env:
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+  GEN_AI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  MOCK_LLM_RESPONSE: true

 jobs:
  playwright-tests:
@@ -196,43 +198,47 @@ jobs:
          cd deployment/docker_compose
          docker compose -f docker-compose.dev.yml -p danswer-stack down -v

-  chromatic-tests:
-    name: Chromatic Tests
+# NOTE: Chromatic UI diff testing is currently disabled.
+# We are using Playwright for local and CI testing without visual regression checks.
+# Chromatic may be reintroduced in the future for UI diff testing if needed.

-    needs: playwright-tests
-    runs-on:
-      [
-        runs-on,
-        runner=32cpu-linux-x64,
-        disk=large,
-        "run-id=${{ github.run_id }}",
-      ]
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
+# chromatic-tests:
+#   name: Chromatic Tests

-      - name: Setup node
-        uses: actions/setup-node@v4
-        with:
-          node-version: 22
+#   needs: playwright-tests
+#   runs-on:
+#     [
+#       runs-on,
+#       runner=32cpu-linux-x64,
+#       disk=large,
+#       "run-id=${{ github.run_id }}",
+#     ]
+#   steps:
+#     - name: Checkout code
+#       uses: actions/checkout@v4
+#       with:
+#         fetch-depth: 0

-      - name: Install node dependencies
-        working-directory: ./web
-        run: npm ci
+#     - name: Setup node
+#       uses: actions/setup-node@v4
+#       with:
+#         node-version: 22

-      - name: Download Playwright test results
-        uses: actions/download-artifact@v4
-        with:
-          name: test-results
-          path: ./web/test-results
+#     - name: Install node dependencies
+#       working-directory: ./web
+#       run: npm ci

-      - name: Run Chromatic
-        uses: chromaui/action@latest
-        with:
-          playwright: true
-          projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
-          workingDir: ./web
-        env:
-          CHROMATIC_ARCHIVE_LOCATION: ./test-results
+#     - name: Download Playwright test results
+#       uses: actions/download-artifact@v4
+#       with:
+#         name: test-results
+#         path: ./web/test-results
+
+#     - name: Run Chromatic
+#       uses: chromaui/action@latest
+#       with:
+#         playwright: true
+#         projectToken: ${{ secrets.CHROMATIC_PROJECT_TOKEN }}
+#         workingDir: ./web
+#       env:
+#         CHROMATIC_ARCHIVE_LOCATION: ./test-results
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -39,6 +39,15 @@ env:
  AIRTABLE_TEST_TABLE_ID: ${{ secrets.AIRTABLE_TEST_TABLE_ID }}
  AIRTABLE_TEST_TABLE_NAME: ${{ secrets.AIRTABLE_TEST_TABLE_NAME }}
  AIRTABLE_ACCESS_TOKEN: ${{ secrets.AIRTABLE_ACCESS_TOKEN }}
+  # Sharepoint
+  SHAREPOINT_CLIENT_ID: ${{ secrets.SHAREPOINT_CLIENT_ID }}
+  SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
+  SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
+  SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
+  # Gitbook
+  GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
+  GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}
+
 jobs:
  connectors-check:
    # See https://runs-on.com/runners/linux/
--- a/.gitignore
+++ b/.gitignore
@@ -7,4 +7,6 @@
 .vscode/
 *.sw?
 /backend/tests/regression/answer_quality/search_test_config.yaml
-/web/test-results/
+/web/test-results/
+backend/onyx/agent_search/main/test_data.json
+backend/tests/regression/answer_quality/test_data.json
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@@ -52,3 +52,9 @@ BING_API_KEY=<REPLACE THIS>
 # Enable the full set of Danswer Enterprise Edition features
 # NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development)
 ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False
+
+# Agent Search configs  # TODO: Remove give proper namings
+AGENT_RETRIEVAL_STATS=False   # Note: This setting will incur substantial re-ranking effort
+AGENT_RERANKING_STATS=True
+AGENT_MAX_QUERY_RETRIEVAL_RESULTS=20
+AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS=20
--- a/.vscode/launch.template.jsonc
+++ b/.vscode/launch.template.jsonc
@@ -205,7 +205,7 @@
                "--loglevel=INFO",
                "--hostname=light@%n",
                "-Q",
-                "vespa_metadata_sync,connector_deletion,doc_permissions_upsert",
+                "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup",
            ],
            "presentation": {
 				 "group": "2",
--- a/README.md
+++ b/README.md
@@ -124,7 +124,7 @@ There are two editions of Onyx:
 To try the Onyx Enterprise Edition:

 1. Checkout our [Cloud product](https://cloud.onyx.app/signup).
-2. For self-hosting, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/danswer/founders).
+2. For self-hosting, contact us at [founders@onyx.app](mailto:founders@onyx.app) or book a call with us on our [Cal](https://cal.com/team/onyx/founders).

 ## 💡 Contributing

@@ -133,3 +133,4 @@ Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md
 ## ⭐Star History

 [![Star History Chart](https://api.star-history.com/svg?repos=onyx-dot-app/onyx&type=Date)](https://star-history.com/#onyx-dot-app/onyx&Date)
+
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -35,7 +35,9 @@ RUN apt-get update && \
        libuuid1=2.38.1-5+deb12u1 \
        libxmlsec1-dev \
        pkg-config \
-        gcc && \
+        gcc \
+        nano \
+        vim && \
    rm -rf /var/lib/apt/lists/* && \
    apt-get clean

@@ -101,7 +103,8 @@ COPY ./alembic_tenants /app/alembic_tenants
 COPY ./alembic.ini /app/alembic.ini
 COPY supervisord.conf /usr/etc/supervisord.conf

-# Escape hatch
+# Escape hatch scripts
+COPY ./scripts/debugging /app/scripts/debugging
 COPY ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py

 # Put logo in assets
--- a/backend/alembic/versions/2cdeff6d8c93_set_built_in_to_default.py
+++ b/backend/alembic/versions/2cdeff6d8c93_set_built_in_to_default.py
@@ -0,0 +1,32 @@
+"""set built in to default
+
+Revision ID: 2cdeff6d8c93
+Revises: f5437cc136c5
+Create Date: 2025-02-11 14:57:51.308775
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "2cdeff6d8c93"
+down_revision = "f5437cc136c5"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Prior to this migration / point in the codebase history,
+    # built in personas were implicitly treated as default personas (with no option to change this)
+    # This migration makes that explicit
+    op.execute(
+        """
+        UPDATE persona
+        SET is_default_persona = TRUE
+        WHERE builtin_persona = TRUE
+    """
+    )
+
+
+def downgrade() -> None:
+    pass
--- a/backend/alembic/versions/2f80c6a2550f_add_chat_session_specific_temperature_.py
+++ b/backend/alembic/versions/2f80c6a2550f_add_chat_session_specific_temperature_.py
@@ -0,0 +1,36 @@
+"""add chat session specific temperature override
+
+Revision ID: 2f80c6a2550f
+Revises: 33ea50e88f24
+Create Date: 2025-01-31 10:30:27.289646
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "2f80c6a2550f"
+down_revision = "33ea50e88f24"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "chat_session", sa.Column("temperature_override", sa.Float(), nullable=True)
+    )
+    op.add_column(
+        "user",
+        sa.Column(
+            "temperature_override_enabled",
+            sa.Boolean(),
+            nullable=False,
+            server_default=sa.false(),
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_session", "temperature_override")
+    op.drop_column("user", "temperature_override_enabled")
--- a/backend/alembic/versions/33ea50e88f24_foreign_key_input_prompts.py
+++ b/backend/alembic/versions/33ea50e88f24_foreign_key_input_prompts.py
@@ -0,0 +1,80 @@
+"""foreign key input prompts
+
+Revision ID: 33ea50e88f24
+Revises: a6df6b88ef81
+Create Date: 2025-01-29 10:54:22.141765
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "33ea50e88f24"
+down_revision = "a6df6b88ef81"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Safely drop constraints if exists
+    op.execute(
+        """
+        ALTER TABLE inputprompt__user
+        DROP CONSTRAINT IF EXISTS inputprompt__user_input_prompt_id_fkey
+        """
+    )
+    op.execute(
+        """
+        ALTER TABLE inputprompt__user
+        DROP CONSTRAINT IF EXISTS inputprompt__user_user_id_fkey
+        """
+    )
+
+    # Recreate with ON DELETE CASCADE
+    op.create_foreign_key(
+        "inputprompt__user_input_prompt_id_fkey",
+        "inputprompt__user",
+        "inputprompt",
+        ["input_prompt_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+    op.create_foreign_key(
+        "inputprompt__user_user_id_fkey",
+        "inputprompt__user",
+        "user",
+        ["user_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+
+
+def downgrade() -> None:
+    # Drop the new FKs with ondelete
+    op.drop_constraint(
+        "inputprompt__user_input_prompt_id_fkey",
+        "inputprompt__user",
+        type_="foreignkey",
+    )
+    op.drop_constraint(
+        "inputprompt__user_user_id_fkey",
+        "inputprompt__user",
+        type_="foreignkey",
+    )
+
+    # Recreate them without cascading
+    op.create_foreign_key(
+        "inputprompt__user_input_prompt_id_fkey",
+        "inputprompt__user",
+        "inputprompt",
+        ["input_prompt_id"],
+        ["id"],
+    )
+    op.create_foreign_key(
+        "inputprompt__user_user_id_fkey",
+        "inputprompt__user",
+        "user",
+        ["user_id"],
+        ["id"],
+    )
--- a/backend/alembic/versions/4d58345da04a_lowercase_user_emails.py
+++ b/backend/alembic/versions/4d58345da04a_lowercase_user_emails.py
@@ -0,0 +1,37 @@
+"""lowercase_user_emails
+
+Revision ID: 4d58345da04a
+Revises: f1ca58b2f2ec
+Create Date: 2025-01-29 07:48:46.784041
+
+"""
+from alembic import op
+from sqlalchemy.sql import text
+
+
+# revision identifiers, used by Alembic.
+revision = "4d58345da04a"
+down_revision = "f1ca58b2f2ec"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Get database connection
+    connection = op.get_bind()
+
+    # Update all user emails to lowercase
+    connection.execute(
+        text(
+            """
+            UPDATE "user"
+            SET email = LOWER(email)
+            WHERE email != LOWER(email)
+            """
+        )
+    )
+
+
+def downgrade() -> None:
+    # Cannot restore original case of emails
+    pass
--- a/backend/alembic/versions/4ee1287bd26a_add_multiple_slack_bot_support.py
+++ b/backend/alembic/versions/4ee1287bd26a_add_multiple_slack_bot_support.py
@@ -5,7 +5,6 @@ Revises: 47e5bef3a1d7
 Create Date: 2024-11-06 13:15:53.302644

 """
-import logging
 from typing import cast
 from alembic import op
 import sqlalchemy as sa
@@ -20,13 +19,8 @@ down_revision = "47e5bef3a1d7"
 branch_labels: None = None
 depends_on: None = None

-# Configure logging
-logger = logging.getLogger("alembic.runtime.migration")
-logger.setLevel(logging.INFO)
-

 def upgrade() -> None:
-    logger.info(f"{revision}: create_table: slack_bot")
    # Create new slack_bot table
    op.create_table(
        "slack_bot",
@@ -63,7 +57,6 @@ def upgrade() -> None:
    )

    # Handle existing Slack bot tokens first
-    logger.info(f"{revision}: Checking for existing Slack bot.")
    bot_token = None
    app_token = None
    first_row_id = None
@@ -71,15 +64,12 @@ def upgrade() -> None:
    try:
        tokens = cast(dict, get_kv_store().load("slack_bot_tokens_config_key"))
    except Exception:
-        logger.warning("No existing Slack bot tokens found.")
        tokens = {}

    bot_token = tokens.get("bot_token")
    app_token = tokens.get("app_token")

    if bot_token and app_token:
-        logger.info(f"{revision}: Found bot and app tokens.")
-
        session = Session(bind=op.get_bind())
        new_slack_bot = SlackBot(
            name="Slack Bot (Migrated)",
@@ -170,10 +160,9 @@ def upgrade() -> None:
    # Clean up old tokens if they existed
    try:
        if bot_token and app_token:
-            logger.info(f"{revision}: Removing old bot and app tokens.")
            get_kv_store().delete("slack_bot_tokens_config_key")
    except Exception:
-        logger.warning("tried to delete tokens in dynamic config but failed")
+        pass
    # Rename the table
    op.rename_table(
        "slack_bot_config__standard_answer_category",
@@ -190,8 +179,6 @@ def upgrade() -> None:
    # Drop the table with CASCADE to handle dependent objects
    op.execute("DROP TABLE slack_bot_config CASCADE")

-    logger.info(f"{revision}: Migration complete.")
-

 def downgrade() -> None:
    # Recreate the old slack_bot_config table
@@ -273,7 +260,7 @@ def downgrade() -> None:
            }
            get_kv_store().store("slack_bot_tokens_config_key", tokens)
    except Exception:
-        logger.warning("Failed to save tokens back to KV store")
+        pass

    # Drop the new tables in reverse order
    op.drop_table("slack_channel_config")
--- a/backend/alembic/versions/98a5008d8711_agent_tracking.py
+++ b/backend/alembic/versions/98a5008d8711_agent_tracking.py
@@ -0,0 +1,107 @@
+"""agent_tracking
+
+Revision ID: 98a5008d8711
+Revises: 2f80c6a2550f
+Create Date: 2025-01-29 17:00:00.000001
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+from sqlalchemy.dialects.postgresql import UUID
+
+# revision identifiers, used by Alembic.
+revision = "98a5008d8711"
+down_revision = "2f80c6a2550f"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "agent__search_metrics",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("persona_id", sa.Integer(), nullable=True),
+        sa.Column("agent_type", sa.String(), nullable=False),
+        sa.Column("start_time", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("base_duration_s", sa.Float(), nullable=False),
+        sa.Column("full_duration_s", sa.Float(), nullable=False),
+        sa.Column("base_metrics", postgresql.JSONB(), nullable=True),
+        sa.Column("refined_metrics", postgresql.JSONB(), nullable=True),
+        sa.Column("all_metrics", postgresql.JSONB(), nullable=True),
+        sa.ForeignKeyConstraint(
+            ["persona_id"],
+            ["persona.id"],
+        ),
+        sa.ForeignKeyConstraint(["user_id"], ["user.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+
+    # Create sub_question table
+    op.create_table(
+        "agent__sub_question",
+        sa.Column("id", sa.Integer, primary_key=True),
+        sa.Column("primary_question_id", sa.Integer, sa.ForeignKey("chat_message.id")),
+        sa.Column(
+            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id")
+        ),
+        sa.Column("sub_question", sa.Text),
+        sa.Column(
+            "time_created", sa.DateTime(timezone=True), server_default=sa.func.now()
+        ),
+        sa.Column("sub_answer", sa.Text),
+        sa.Column("sub_question_doc_results", postgresql.JSONB(), nullable=True),
+        sa.Column("level", sa.Integer(), nullable=False),
+        sa.Column("level_question_num", sa.Integer(), nullable=False),
+    )
+
+    # Create sub_query table
+    op.create_table(
+        "agent__sub_query",
+        sa.Column("id", sa.Integer, primary_key=True),
+        sa.Column(
+            "parent_question_id", sa.Integer, sa.ForeignKey("agent__sub_question.id")
+        ),
+        sa.Column(
+            "chat_session_id", UUID(as_uuid=True), sa.ForeignKey("chat_session.id")
+        ),
+        sa.Column("sub_query", sa.Text),
+        sa.Column(
+            "time_created", sa.DateTime(timezone=True), server_default=sa.func.now()
+        ),
+    )
+
+    # Create sub_query__search_doc association table
+    op.create_table(
+        "agent__sub_query__search_doc",
+        sa.Column(
+            "sub_query_id",
+            sa.Integer,
+            sa.ForeignKey("agent__sub_query.id"),
+            primary_key=True,
+        ),
+        sa.Column(
+            "search_doc_id",
+            sa.Integer,
+            sa.ForeignKey("search_doc.id"),
+            primary_key=True,
+        ),
+    )
+
+    op.add_column(
+        "chat_message",
+        sa.Column(
+            "refined_answer_improvement",
+            sa.Boolean(),
+            nullable=True,
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("chat_message", "refined_answer_improvement")
+    op.drop_table("agent__sub_query__search_doc")
+    op.drop_table("agent__sub_query")
+    op.drop_table("agent__sub_question")
+    op.drop_table("agent__search_metrics")
--- a/backend/alembic/versions/a6df6b88ef81_remove_recent_assistants.py
+++ b/backend/alembic/versions/a6df6b88ef81_remove_recent_assistants.py
@@ -0,0 +1,29 @@
+"""remove recent assistants
+
+Revision ID: a6df6b88ef81
+Revises: 4d58345da04a
+Create Date: 2025-01-29 10:25:52.790407
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "a6df6b88ef81"
+down_revision = "4d58345da04a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.drop_column("user", "recent_assistants")
+
+
+def downgrade() -> None:
+    op.add_column(
+        "user",
+        sa.Column(
+            "recent_assistants", postgresql.JSONB(), server_default="[]", nullable=False
+        ),
+    )
--- a/backend/alembic/versions/b7a7eee5aa15_add_checkpointing_failure_handling.py
+++ b/backend/alembic/versions/b7a7eee5aa15_add_checkpointing_failure_handling.py
@@ -0,0 +1,124 @@
+"""Add checkpointing/failure handling
+
+Revision ID: b7a7eee5aa15
+Revises: f39c5794c10a
+Create Date: 2025-01-24 15:17:36.763172
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "b7a7eee5aa15"
+down_revision = "f39c5794c10a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "index_attempt",
+        sa.Column("checkpoint_pointer", sa.String(), nullable=True),
+    )
+    op.add_column(
+        "index_attempt",
+        sa.Column("poll_range_start", sa.DateTime(timezone=True), nullable=True),
+    )
+    op.add_column(
+        "index_attempt",
+        sa.Column("poll_range_end", sa.DateTime(timezone=True), nullable=True),
+    )
+
+    op.create_index(
+        "ix_index_attempt_cc_pair_settings_poll",
+        "index_attempt",
+        [
+            "connector_credential_pair_id",
+            "search_settings_id",
+            "status",
+            sa.text("time_updated DESC"),
+        ],
+    )
+
+    # Drop the old IndexAttemptError table
+    op.drop_index("index_attempt_id", table_name="index_attempt_errors")
+    op.drop_table("index_attempt_errors")
+
+    # Create the new version of the table
+    op.create_table(
+        "index_attempt_errors",
+        sa.Column("id", sa.Integer(), primary_key=True),
+        sa.Column("index_attempt_id", sa.Integer(), nullable=False),
+        sa.Column("connector_credential_pair_id", sa.Integer(), nullable=False),
+        sa.Column("document_id", sa.String(), nullable=True),
+        sa.Column("document_link", sa.String(), nullable=True),
+        sa.Column("entity_id", sa.String(), nullable=True),
+        sa.Column("failed_time_range_start", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("failed_time_range_end", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("failure_message", sa.Text(), nullable=False),
+        sa.Column("is_resolved", sa.Boolean(), nullable=False, default=False),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.ForeignKeyConstraint(
+            ["index_attempt_id"],
+            ["index_attempt.id"],
+        ),
+        sa.ForeignKeyConstraint(
+            ["connector_credential_pair_id"],
+            ["connector_credential_pair.id"],
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.execute("SET lock_timeout = '5s'")
+
+    # try a few times to drop the table, this has been observed to fail due to other locks
+    # blocking the drop
+    NUM_TRIES = 10
+    for i in range(NUM_TRIES):
+        try:
+            op.drop_table("index_attempt_errors")
+            break
+        except Exception as e:
+            if i == NUM_TRIES - 1:
+                raise e
+            print(f"Error dropping table: {e}. Retrying...")
+
+    op.execute("SET lock_timeout = DEFAULT")
+
+    # Recreate the old IndexAttemptError table
+    op.create_table(
+        "index_attempt_errors",
+        sa.Column("id", sa.Integer(), primary_key=True),
+        sa.Column("index_attempt_id", sa.Integer(), nullable=True),
+        sa.Column("batch", sa.Integer(), nullable=True),
+        sa.Column("doc_summaries", postgresql.JSONB(), nullable=False),
+        sa.Column("error_msg", sa.Text(), nullable=True),
+        sa.Column("traceback", sa.Text(), nullable=True),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+        ),
+        sa.ForeignKeyConstraint(
+            ["index_attempt_id"],
+            ["index_attempt.id"],
+        ),
+    )
+
+    op.create_index(
+        "index_attempt_id",
+        "index_attempt_errors",
+        ["time_created"],
+    )
+
+    op.drop_index("ix_index_attempt_cc_pair_settings_poll")
+    op.drop_column("index_attempt", "checkpoint_pointer")
+    op.drop_column("index_attempt", "poll_range_start")
+    op.drop_column("index_attempt", "poll_range_end")
--- a/backend/alembic/versions/eaa3b5593925_add_default_slack_channel_config.py
+++ b/backend/alembic/versions/eaa3b5593925_add_default_slack_channel_config.py
@@ -0,0 +1,80 @@
+"""add default slack channel config
+
+Revision ID: eaa3b5593925
+Revises: 98a5008d8711
+Create Date: 2025-02-03 18:07:56.552526
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "eaa3b5593925"
+down_revision = "98a5008d8711"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add is_default column
+    op.add_column(
+        "slack_channel_config",
+        sa.Column("is_default", sa.Boolean(), nullable=False, server_default="false"),
+    )
+
+    op.create_index(
+        "ix_slack_channel_config_slack_bot_id_default",
+        "slack_channel_config",
+        ["slack_bot_id", "is_default"],
+        unique=True,
+        postgresql_where=sa.text("is_default IS TRUE"),
+    )
+
+    # Create default channel configs for existing slack bots without one
+    conn = op.get_bind()
+    slack_bots = conn.execute(sa.text("SELECT id FROM slack_bot")).fetchall()
+
+    for slack_bot in slack_bots:
+        slack_bot_id = slack_bot[0]
+        existing_default = conn.execute(
+            sa.text(
+                "SELECT id FROM slack_channel_config WHERE slack_bot_id = :bot_id AND is_default = TRUE"
+            ),
+            {"bot_id": slack_bot_id},
+        ).fetchone()
+
+        if not existing_default:
+            conn.execute(
+                sa.text(
+                    """
+                    INSERT INTO slack_channel_config (
+                        slack_bot_id, persona_id, channel_config, enable_auto_filters, is_default
+                    ) VALUES (
+                        :bot_id, NULL,
+                        '{"channel_name": null, '
+                        '"respond_member_group_list": [], '
+                        '"answer_filters": [], '
+                        '"follow_up_tags": [], '
+                        '"respond_tag_only": true}',
+                        FALSE, TRUE
+                    )
+                """
+                ),
+                {"bot_id": slack_bot_id},
+            )
+
+
+def downgrade() -> None:
+    # Delete default slack channel configs
+    conn = op.get_bind()
+    conn.execute(sa.text("DELETE FROM slack_channel_config WHERE is_default = TRUE"))
+
+    # Remove index
+    op.drop_index(
+        "ix_slack_channel_config_slack_bot_id_default",
+        table_name="slack_channel_config",
+    )
+
+    # Remove is_default column
+    op.drop_column("slack_channel_config", "is_default")
--- a/backend/alembic/versions/f39c5794c10a_add_background_errors_table.py
+++ b/backend/alembic/versions/f39c5794c10a_add_background_errors_table.py
@@ -0,0 +1,40 @@
+"""Add background errors table
+
+Revision ID: f39c5794c10a
+Revises: 2cdeff6d8c93
+Create Date: 2025-02-12 17:11:14.527876
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "f39c5794c10a"
+down_revision = "2cdeff6d8c93"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "background_error",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("message", sa.String(), nullable=False),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column("cc_pair_id", sa.Integer(), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+        sa.ForeignKeyConstraint(
+            ["cc_pair_id"],
+            ["connector_credential_pair.id"],
+            ondelete="CASCADE",
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("background_error")
--- a/backend/alembic/versions/f5437cc136c5_delete_non_search_assistants.py
+++ b/backend/alembic/versions/f5437cc136c5_delete_non_search_assistants.py
@@ -0,0 +1,53 @@
+"""delete non-search assistants
+
+Revision ID: f5437cc136c5
+Revises: eaa3b5593925
+Create Date: 2025-02-04 16:17:15.677256
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "f5437cc136c5"
+down_revision = "eaa3b5593925"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    pass
+
+
+def downgrade() -> None:
+    # Fix: split the statements into multiple op.execute() calls
+    op.execute(
+        """
+        WITH personas_without_search AS (
+            SELECT p.id
+            FROM persona p
+            LEFT JOIN persona__tool pt ON p.id = pt.persona_id
+            LEFT JOIN tool t ON pt.tool_id = t.id
+            GROUP BY p.id
+            HAVING COUNT(CASE WHEN t.in_code_tool_id = 'run_search' THEN 1 END) = 0
+        )
+        UPDATE slack_channel_config
+        SET persona_id = NULL
+        WHERE is_default = TRUE AND persona_id IN (SELECT id FROM personas_without_search)
+        """
+    )
+
+    op.execute(
+        """
+        WITH personas_without_search AS (
+            SELECT p.id
+            FROM persona p
+            LEFT JOIN persona__tool pt ON p.id = pt.persona_id
+            LEFT JOIN tool t ON pt.tool_id = t.id
+            GROUP BY p.id
+            HAVING COUNT(CASE WHEN t.in_code_tool_id = 'run_search' THEN 1 END) = 0
+        )
+        DELETE FROM slack_channel_config
+        WHERE is_default = FALSE AND persona_id IN (SELECT id FROM personas_without_search)
+        """
+    )
--- a/backend/ee/onyx/background/celery/apps/primary.py
+++ b/backend/ee/onyx/background/celery/apps/primary.py
@@ -32,6 +32,7 @@ def perform_ttl_management_task(

@celery_app.task(
    name="check_ttl_management_task",
+    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
 )
 def check_ttl_management_task(*, tenant_id: str | None) -> None:
@@ -56,6 +57,7 @@ def check_ttl_management_task(*, tenant_id: str | None) -> None:

@celery_app.task(
    name="autogenerate_usage_report_task",
+    ignore_result=True,
    soft_time_limit=JOB_TIMEOUT,
 )
 def autogenerate_usage_report_task(*, tenant_id: str | None) -> None:
--- a/backend/ee/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/ee/onyx/background/celery/tasks/beat_schedule.py
@@ -1,44 +1,46 @@
 from datetime import timedelta
 from typing import Any

+from onyx.background.celery.tasks.beat_schedule import (
+    beat_cloud_tasks as base_beat_system_tasks,
+)
 from onyx.background.celery.tasks.beat_schedule import BEAT_EXPIRES_DEFAULT
 from onyx.background.celery.tasks.beat_schedule import (
-    cloud_tasks_to_schedule as base_cloud_tasks_to_schedule,
+    beat_task_templates as base_beat_task_templates,
 )
+from onyx.background.celery.tasks.beat_schedule import generate_cloud_tasks
 from onyx.background.celery.tasks.beat_schedule import (
-    tasks_to_schedule as base_tasks_to_schedule,
+    get_tasks_to_schedule as base_get_tasks_to_schedule,
 )
-from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryTask
 from shared_configs.configs import MULTI_TENANT

-ee_cloud_tasks_to_schedule = [
-    {
-        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_autogenerate-usage-report",
-        "task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
-        "schedule": timedelta(days=30),
-        "options": {
-            "priority": OnyxCeleryPriority.HIGHEST,
-            "expires": BEAT_EXPIRES_DEFAULT,
+ee_beat_system_tasks: list[dict] = []
+
+ee_beat_task_templates: list[dict] = []
+ee_beat_task_templates.extend(
+    [
+        {
+            "name": "autogenerate-usage-report",
+            "task": OnyxCeleryTask.AUTOGENERATE_USAGE_REPORT_TASK,
+            "schedule": timedelta(days=30),
+            "options": {
+                "priority": OnyxCeleryPriority.MEDIUM,
+                "expires": BEAT_EXPIRES_DEFAULT,
+            },
        },
-        "kwargs": {
-            "task_name": OnyxCeleryTask.AUTOGENERATE_USAGE_REPORT_TASK,
+        {
+            "name": "check-ttl-management",
+            "task": OnyxCeleryTask.CHECK_TTL_MANAGEMENT_TASK,
+            "schedule": timedelta(hours=1),
+            "options": {
+                "priority": OnyxCeleryPriority.MEDIUM,
+                "expires": BEAT_EXPIRES_DEFAULT,
+            },
        },
-    },
-    {
-        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-ttl-management",
-        "task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
-        "schedule": timedelta(hours=1),
-        "options": {
-            "priority": OnyxCeleryPriority.HIGHEST,
-            "expires": BEAT_EXPIRES_DEFAULT,
-        },
-        "kwargs": {
-            "task_name": OnyxCeleryTask.CHECK_TTL_MANAGEMENT_TASK,
-        },
-    },
-]
+    ]
+)

 ee_tasks_to_schedule: list[dict] = []

@@ -65,9 +67,14 @@ if not MULTI_TENANT:
    ]


-def get_cloud_tasks_to_schedule() -> list[dict[str, Any]]:
-    return ee_cloud_tasks_to_schedule + base_cloud_tasks_to_schedule
+def get_cloud_tasks_to_schedule(beat_multiplier: float) -> list[dict[str, Any]]:
+    beat_system_tasks = ee_beat_system_tasks + base_beat_system_tasks
+    beat_task_templates = ee_beat_task_templates + base_beat_task_templates
+    cloud_tasks = generate_cloud_tasks(
+        beat_system_tasks, beat_task_templates, beat_multiplier
+    )
+    return cloud_tasks


 def get_tasks_to_schedule() -> list[dict[str, Any]]:
-    return ee_tasks_to_schedule + base_tasks_to_schedule
+    return ee_tasks_to_schedule + base_get_tasks_to_schedule()
--- a/backend/ee/onyx/configs/app_configs.py
+++ b/backend/ee/onyx/configs/app_configs.py
@@ -77,3 +77,5 @@ POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"
 HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")

 ANONYMOUS_USER_COOKIE_NAME = "onyx_anonymous_user"
+
+GATED_TENANTS_KEY = "gated_tenants"
--- a/backend/ee/onyx/db/persona.py
+++ b/backend/ee/onyx/db/persona.py
@@ -2,8 +2,11 @@ from uuid import UUID

 from sqlalchemy.orm import Session

+from onyx.configs.constants import NotificationType
 from onyx.db.models import Persona__User
 from onyx.db.models import Persona__UserGroup
+from onyx.db.notification import create_notification
+from onyx.server.features.persona.models import PersonaSharedNotificationData


 def make_persona_private(
@@ -12,6 +15,9 @@ def make_persona_private(
    group_ids: list[int] | None,
    db_session: Session,
 ) -> None:
+    """NOTE(rkuo): This function batches all updates into a single commit. If we don't
+    dedupe the inputs, the commit will exception."""
+
    db_session.query(Persona__User).filter(
        Persona__User.persona_id == persona_id
    ).delete(synchronize_session="fetch")
@@ -20,11 +26,22 @@ def make_persona_private(
    ).delete(synchronize_session="fetch")

    if user_ids:
-        for user_uuid in user_ids:
-            db_session.add(Persona__User(persona_id=persona_id, user_id=user_uuid))
+        user_ids_set = set(user_ids)
+        for user_id in user_ids_set:
+            db_session.add(Persona__User(persona_id=persona_id, user_id=user_id))
+
+            create_notification(
+                user_id=user_id,
+                notif_type=NotificationType.PERSONA_SHARED,
+                db_session=db_session,
+                additional_data=PersonaSharedNotificationData(
+                    persona_id=persona_id,
+                ).model_dump(),
+            )

    if group_ids:
-        for group_id in group_ids:
+        group_ids_set = set(group_ids)
+        for group_id in group_ids_set:
            db_session.add(
                Persona__UserGroup(persona_id=persona_id, user_group_id=group_id)
            )
--- a/backend/ee/onyx/db/user_group.py
+++ b/backend/ee/onyx/db/user_group.py
@@ -218,14 +218,14 @@ def fetch_user_groups_for_user(
    return db_session.scalars(stmt).all()


-def construct_document_select_by_usergroup(
+def construct_document_id_select_by_usergroup(
    user_group_id: int,
 ) -> Select:
    """This returns a statement that should be executed using
    .yield_per() to minimize overhead. The primary consumers of this function
    are background processing task generators."""
    stmt = (
-        select(Document)
+        select(Document.id)
        .join(
            DocumentByConnectorCredentialPair,
            Document.id == DocumentByConnectorCredentialPair.id,
--- a/backend/ee/onyx/external_permissions/confluence/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/doc_sync.py
@@ -13,6 +13,7 @@ from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
 from onyx.connectors.confluence.utils import get_user_email_from_username__server
 from onyx.connectors.models import SlimDocument
 from onyx.db.models import ConnectorCredentialPair
+from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -257,6 +258,7 @@ def _fetch_all_page_restrictions(
    slim_docs: list[SlimDocument],
    space_permissions_by_space_key: dict[str, ExternalAccess],
    is_cloud: bool,
+    callback: IndexingHeartbeatInterface | None,
 ) -> list[DocExternalAccess]:
    """
    For all pages, if a page has restrictions, then use those restrictions.
@@ -265,6 +267,12 @@ def _fetch_all_page_restrictions(
    document_restrictions: list[DocExternalAccess] = []

    for slim_doc in slim_docs:
+        if callback:
+            if callback.should_stop():
+                raise RuntimeError("confluence_doc_sync: Stop signal detected")
+
+            callback.progress("confluence_doc_sync:fetch_all_page_restrictions", 1)
+
        if slim_doc.perm_sync_data is None:
            raise ValueError(
                f"No permission sync data found for document {slim_doc.id}"
@@ -334,7 +342,7 @@ def _fetch_all_page_restrictions(


 def confluence_doc_sync(
-    cc_pair: ConnectorCredentialPair,
+    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
 ) -> list[DocExternalAccess]:
    """
    Adds the external permissions to the documents in postgres
@@ -357,8 +365,16 @@ def confluence_doc_sync(

    slim_docs = []
    logger.debug("Fetching all slim documents from confluence")
-    for doc_batch in confluence_connector.retrieve_all_slim_documents():
+    for doc_batch in confluence_connector.retrieve_all_slim_documents(
+        callback=callback
+    ):
        logger.debug(f"Got {len(doc_batch)} slim documents from confluence")
+        if callback:
+            if callback.should_stop():
+                raise RuntimeError("confluence_doc_sync: Stop signal detected")
+
+            callback.progress("confluence_doc_sync", 1)
+
        slim_docs.extend(doc_batch)

    logger.debug("Fetching all page restrictions for space")
@@ -367,4 +383,5 @@ def confluence_doc_sync(
        slim_docs=slim_docs,
        space_permissions_by_space_key=space_permissions_by_space_key,
        is_cloud=is_cloud,
+        callback=callback,
    )
--- a/backend/ee/onyx/external_permissions/confluence/group_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/group_sync.py
@@ -1,5 +1,6 @@
 from ee.onyx.db.external_perm import ExternalUserGroup
 from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
+from onyx.background.error_logging import emit_background_error
 from onyx.connectors.confluence.onyx_confluence import build_confluence_client
 from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
 from onyx.connectors.confluence.utils import get_user_email_from_username__server
@@ -10,14 +11,19 @@ logger = setup_logger()


 def _build_group_member_email_map(
-    confluence_client: OnyxConfluence,
+    confluence_client: OnyxConfluence, cc_pair_id: int
 ) -> dict[str, set[str]]:
    group_member_emails: dict[str, set[str]] = {}
    for user_result in confluence_client.paginated_cql_user_retrieval():
+        logger.debug(f"Processing groups for user: {user_result}")
+
        user = user_result.get("user", {})
        if not user:
-            logger.warning(f"user result missing user field: {user_result}")
+            msg = f"user result missing user field: {user_result}"
+            emit_background_error(msg, cc_pair_id=cc_pair_id)
+            logger.error(msg)
            continue
+
        email = user.get("email")
        if not email:
            # This field is only present in Confluence Server
@@ -30,13 +36,32 @@ def _build_group_member_email_map(
                )
        if not email:
            # If we still don't have an email, skip this user
-            logger.warning(f"user result missing email field: {user_result}")
+            msg = f"user result missing email field: {user_result}"
+            if user.get("type") == "app":
+                logger.warning(msg)
+            else:
+                emit_background_error(msg, cc_pair_id=cc_pair_id)
+                logger.error(msg)
            continue

+        all_users_groups: set[str] = set()
        for group in confluence_client.paginated_groups_by_user_retrieval(user):
            # group name uniqueness is enforced by Confluence, so we can use it as a group ID
            group_id = group["name"]
            group_member_emails.setdefault(group_id, set()).add(email)
+            all_users_groups.add(group_id)
+
+        if not all_users_groups:
+            msg = f"No groups found for user with email: {email}"
+            emit_background_error(msg, cc_pair_id=cc_pair_id)
+            logger.error(msg)
+        else:
+            logger.debug(f"Found groups {all_users_groups} for user with email {email}")
+
+    if not group_member_emails:
+        msg = "No groups found for any users."
+        emit_background_error(msg, cc_pair_id=cc_pair_id)
+        logger.error(msg)

    return group_member_emails

@@ -52,6 +77,7 @@ def confluence_group_sync(

    group_member_email_map = _build_group_member_email_map(
        confluence_client=confluence_client,
+        cc_pair_id=cc_pair.id,
    )
    onyx_groups: list[ExternalUserGroup] = []
    all_found_emails = set()
--- a/backend/ee/onyx/external_permissions/gmail/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/gmail/doc_sync.py
@@ -6,6 +6,7 @@ from onyx.access.models import ExternalAccess
 from onyx.connectors.gmail.connector import GmailConnector
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.db.models import ConnectorCredentialPair
+from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -14,6 +15,7 @@ logger = setup_logger()
 def _get_slim_doc_generator(
    cc_pair: ConnectorCredentialPair,
    gmail_connector: GmailConnector,
+    callback: IndexingHeartbeatInterface | None = None,
 ) -> GenerateSlimDocumentOutput:
    current_time = datetime.now(timezone.utc)
    start_time = (
@@ -23,12 +25,14 @@ def _get_slim_doc_generator(
    )

    return gmail_connector.retrieve_all_slim_documents(
-        start=start_time, end=current_time.timestamp()
+        start=start_time,
+        end=current_time.timestamp(),
+        callback=callback,
    )


 def gmail_doc_sync(
-    cc_pair: ConnectorCredentialPair,
+    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
 ) -> list[DocExternalAccess]:
    """
    Adds the external permissions to the documents in postgres
@@ -39,11 +43,19 @@ def gmail_doc_sync(
    gmail_connector = GmailConnector(**cc_pair.connector.connector_specific_config)
    gmail_connector.load_credentials(cc_pair.credential.credential_json)

-    slim_doc_generator = _get_slim_doc_generator(cc_pair, gmail_connector)
+    slim_doc_generator = _get_slim_doc_generator(
+        cc_pair, gmail_connector, callback=callback
+    )

    document_external_access: list[DocExternalAccess] = []
    for slim_doc_batch in slim_doc_generator:
        for slim_doc in slim_doc_batch:
+            if callback:
+                if callback.should_stop():
+                    raise RuntimeError("gmail_doc_sync: Stop signal detected")
+
+                callback.progress("gmail_doc_sync", 1)
+
            if slim_doc.perm_sync_data is None:
                logger.warning(f"No permissions found for document {slim_doc.id}")
                continue
--- a/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/google_drive/doc_sync.py
@@ -10,6 +10,7 @@ from onyx.connectors.google_utils.resources import get_drive_service
 from onyx.connectors.interfaces import GenerateSlimDocumentOutput
 from onyx.connectors.models import SlimDocument
 from onyx.db.models import ConnectorCredentialPair
+from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -20,6 +21,7 @@ _PERMISSION_ID_PERMISSION_MAP: dict[str, dict[str, Any]] = {}
 def _get_slim_doc_generator(
    cc_pair: ConnectorCredentialPair,
    google_drive_connector: GoogleDriveConnector,
+    callback: IndexingHeartbeatInterface | None = None,
 ) -> GenerateSlimDocumentOutput:
    current_time = datetime.now(timezone.utc)
    start_time = (
@@ -29,7 +31,9 @@ def _get_slim_doc_generator(
    )

    return google_drive_connector.retrieve_all_slim_documents(
-        start=start_time, end=current_time.timestamp()
+        start=start_time,
+        end=current_time.timestamp(),
+        callback=callback,
    )


@@ -42,24 +46,22 @@ def _fetch_permissions_for_permission_ids(
    if not permission_info or not doc_id:
        return []

-    # Check cache first for all permission IDs
    permissions = [
        _PERMISSION_ID_PERMISSION_MAP[pid]
        for pid in permission_ids
        if pid in _PERMISSION_ID_PERMISSION_MAP
    ]

-    # If we found all permissions in cache, return them
    if len(permissions) == len(permission_ids):
        return permissions

    owner_email = permission_info.get("owner_email")
+
    drive_service = get_drive_service(
        creds=google_drive_connector.creds,
        user_email=(owner_email or google_drive_connector.primary_admin_email),
    )

-    # Otherwise, fetch all permissions and update cache
    fetched_permissions = execute_paginated_retrieval(
        retrieval_function=drive_service.permissions().list,
        list_key="permissions",
@@ -69,7 +71,6 @@ def _fetch_permissions_for_permission_ids(
    )

    permissions_for_doc_id = []
-    # Update cache and return all permissions
    for permission in fetched_permissions:
        permissions_for_doc_id.append(permission)
        _PERMISSION_ID_PERMISSION_MAP[permission["id"]] = permission
@@ -131,7 +132,7 @@ def _get_permissions_from_slim_doc(


 def gdrive_doc_sync(
-    cc_pair: ConnectorCredentialPair,
+    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
 ) -> list[DocExternalAccess]:
    """
    Adds the external permissions to the documents in postgres
@@ -149,6 +150,12 @@ def gdrive_doc_sync(
    document_external_accesses = []
    for slim_doc_batch in slim_doc_generator:
        for slim_doc in slim_doc_batch:
+            if callback:
+                if callback.should_stop():
+                    raise RuntimeError("gdrive_doc_sync: Stop signal detected")
+
+                callback.progress("gdrive_doc_sync", 1)
+
            ext_access = _get_permissions_from_slim_doc(
                google_drive_connector=google_drive_connector,
                slim_doc=slim_doc,
--- a/backend/ee/onyx/external_permissions/slack/doc_sync.py
+++ b/backend/ee/onyx/external_permissions/slack/doc_sync.py
@@ -5,8 +5,9 @@ from onyx.access.models import DocExternalAccess
 from onyx.access.models import ExternalAccess
 from onyx.connectors.slack.connector import get_channels
 from onyx.connectors.slack.connector import make_paginated_slack_api_call_w_retries
-from onyx.connectors.slack.connector import SlackPollConnector
+from onyx.connectors.slack.connector import SlackConnector
 from onyx.db.models import ConnectorCredentialPair
+from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
 from onyx.utils.logger import setup_logger


@@ -14,12 +15,12 @@ logger = setup_logger()


 def _get_slack_document_ids_and_channels(
-    cc_pair: ConnectorCredentialPair,
+    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
 ) -> dict[str, list[str]]:
-    slack_connector = SlackPollConnector(**cc_pair.connector.connector_specific_config)
+    slack_connector = SlackConnector(**cc_pair.connector.connector_specific_config)
    slack_connector.load_credentials(cc_pair.credential.credential_json)

-    slim_doc_generator = slack_connector.retrieve_all_slim_documents()
+    slim_doc_generator = slack_connector.retrieve_all_slim_documents(callback=callback)

    channel_doc_map: dict[str, list[str]] = {}
    for doc_metadata_batch in slim_doc_generator:
@@ -31,6 +32,14 @@ def _get_slack_document_ids_and_channels(
                channel_doc_map[channel_id] = []
            channel_doc_map[channel_id].append(doc_metadata.id)

+        if callback:
+            if callback.should_stop():
+                raise RuntimeError(
+                    "_get_slack_document_ids_and_channels: Stop signal detected"
+                )
+
+            callback.progress("_get_slack_document_ids_and_channels", 1)
+
    return channel_doc_map


@@ -114,7 +123,7 @@ def _fetch_channel_permissions(


 def slack_doc_sync(
-    cc_pair: ConnectorCredentialPair,
+    cc_pair: ConnectorCredentialPair, callback: IndexingHeartbeatInterface | None
 ) -> list[DocExternalAccess]:
    """
    Adds the external permissions to the documents in postgres
@@ -127,7 +136,7 @@ def slack_doc_sync(
    )
    user_id_to_email_map = fetch_user_id_to_email_map(slack_client)
    channel_doc_map = _get_slack_document_ids_and_channels(
-        cc_pair=cc_pair,
+        cc_pair=cc_pair, callback=callback
    )
    workspace_permissions = _fetch_workspace_permissions(
        user_id_to_email_map=user_id_to_email_map,
--- a/backend/ee/onyx/external_permissions/sync_params.py
+++ b/backend/ee/onyx/external_permissions/sync_params.py
@@ -15,11 +15,13 @@ from ee.onyx.external_permissions.slack.doc_sync import slack_doc_sync
 from onyx.access.models import DocExternalAccess
 from onyx.configs.constants import DocumentSource
 from onyx.db.models import ConnectorCredentialPair
+from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface

 # Defining the input/output types for the sync functions
 DocSyncFuncType = Callable[
    [
        ConnectorCredentialPair,
+        IndexingHeartbeatInterface | None,
    ],
    list[DocExternalAccess],
 ]
--- a/backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py
+++ b/backend/ee/onyx/onyxbot/slack/handlers/handle_standard_answers.py
@@ -80,7 +80,7 @@ def oneoff_standard_answers(
 def _handle_standard_answers(
    message_info: SlackMessageInfo,
    receiver_ids: list[str] | None,
-    slack_channel_config: SlackChannelConfig | None,
+    slack_channel_config: SlackChannelConfig,
    prompt: Prompt | None,
    logger: OnyxLoggingAdapter,
    client: WebClient,
@@ -94,13 +94,10 @@ def _handle_standard_answers(
    Returns True if standard answers are found to match the user's message and therefore,
    we still need to respond to the users.
    """
-    # if no channel config, then no standard answers are configured
-    if not slack_channel_config:
-        return False

    slack_thread_id = message_info.thread_to_respond
    configured_standard_answer_categories = (
-        slack_channel_config.standard_answer_categories if slack_channel_config else []
+        slack_channel_config.standard_answer_categories
    )
    configured_standard_answers = set(
        [
--- a/backend/ee/onyx/server/middleware/tenant_tracking.py
+++ b/backend/ee/onyx/server/middleware/tenant_tracking.py
@@ -10,6 +10,7 @@ from fastapi import Response
 from ee.onyx.auth.users import decode_anonymous_user_jwt_token
 from ee.onyx.configs.app_configs import ANONYMOUS_USER_COOKIE_NAME
 from onyx.auth.api_key import extract_tenant_from_api_key_header
+from onyx.configs.constants import TENANT_ID_COOKIE_NAME
 from onyx.db.engine import is_valid_schema_name
 from onyx.redis.redis_pool import retrieve_auth_token_data_from_redis
 from shared_configs.configs import MULTI_TENANT
@@ -43,6 +44,7 @@ async def _get_tenant_id_from_request(
    Attempt to extract tenant_id from:
    1) The API key header
    2) The Redis-based token (stored in Cookie: fastapiusersauth)
+    3)  Reset token cookie
    Fallback: POSTGRES_DEFAULT_SCHEMA
    """
    # Check for API key
@@ -62,6 +64,7 @@ async def _get_tenant_id_from_request(

    try:
        # Look up token data in Redis
+
        token_data = await retrieve_auth_token_data_from_redis(request)

        if not token_data:
@@ -85,8 +88,18 @@ async def _get_tenant_id_from_request(
        if not is_valid_schema_name(tenant_id):
            raise HTTPException(status_code=400, detail="Invalid tenant ID format")

-        return tenant_id
-
    except Exception as e:
        logger.error(f"Unexpected error in _get_tenant_id_from_request: {str(e)}")
        raise HTTPException(status_code=500, detail="Internal server error")
+
+    finally:
+        if tenant_id:
+            return tenant_id
+
+        # As a final step, check for explicit tenant_id cookie
+        tenant_id_cookie = request.cookies.get(TENANT_ID_COOKIE_NAME)
+        if tenant_id_cookie and is_valid_schema_name(tenant_id_cookie):
+            return tenant_id_cookie
+
+        # If we've reached this point, return the default schema
+        return POSTGRES_DEFAULT_SCHEMA
--- a/backend/ee/onyx/server/oauth.py
+++ b/backend/ee/onyx/server/oauth.py
@@ -286,6 +286,7 @@ def prepare_authorization_request(
    oauth_state = (
        base64.urlsafe_b64encode(oauth_uuid.bytes).rstrip(b"=").decode("utf-8")
    )
+    session: str

    if connector == DocumentSource.SLACK:
        oauth_url = SlackOAuth.generate_oauth_url(oauth_state)
@@ -554,6 +555,7 @@ def handle_google_drive_oauth_callback(
        )

    session_json = session_json_bytes.decode("utf-8")
+    session: GoogleDriveOAuth.OAuthSession
    try:
        session = GoogleDriveOAuth.parse_session(session_json)

--- a/backend/ee/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/chat_backend.py
@@ -179,6 +179,7 @@ def handle_simplified_chat_message(
        chunks_below=0,
        full_doc=chat_message_req.full_doc,
        structured_response_format=chat_message_req.structured_response_format,
+        use_agentic_search=chat_message_req.use_agentic_search,
    )

    packets = stream_chat_message_objects(
@@ -301,6 +302,7 @@ def handle_send_message_simple_with_history(
        chunks_below=0,
        full_doc=req.full_doc,
        structured_response_format=req.structured_response_format,
+        use_agentic_search=req.use_agentic_search,
    )

    packets = stream_chat_message_objects(
--- a/backend/ee/onyx/server/query_and_chat/models.py
+++ b/backend/ee/onyx/server/query_and_chat/models.py
@@ -57,6 +57,9 @@ class BasicCreateChatMessageRequest(ChunkContext):
    # https://platform.openai.com/docs/guides/structured-outputs/introduction
    structured_response_format: dict | None = None

+    # If True, uses agentic search instead of basic search
+    use_agentic_search: bool = False
+

 class BasicCreateChatMessageWithHistoryRequest(ChunkContext):
    # Last element is the new query. All previous elements are historical context
@@ -71,6 +74,8 @@ class BasicCreateChatMessageWithHistoryRequest(ChunkContext):
    # only works if using an OpenAI model. See the following for more details:
    # https://platform.openai.com/docs/guides/structured-outputs/introduction
    structured_response_format: dict | None = None
+    # If True, uses agentic search instead of basic search
+    use_agentic_search: bool = False


 class SimpleDoc(BaseModel):
@@ -120,9 +125,12 @@ class OneShotQARequest(ChunkContext):
    # will also disable Thread-based Rewording if specified
    query_override: str | None = None

-    # If True, skips generative an AI response to the search query
+    # If True, skips generating an AI response to the search query
    skip_gen_ai_answer_generation: bool = False

+    # If True, uses agentic search instead of basic search
+    use_agentic_search: bool = False
+
    @model_validator(mode="after")
    def check_persona_fields(self) -> "OneShotQARequest":
        if self.persona_override_config is None and self.persona_id is None:
--- a/backend/ee/onyx/server/query_and_chat/query_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/query_backend.py
@@ -83,6 +83,7 @@ def handle_search_request(
        user=user,
        llm=llm,
        fast_llm=fast_llm,
+        skip_query_analysis=False,
        db_session=db_session,
        bypass_acl=False,
    )
@@ -196,6 +197,8 @@ def get_answer_stream(
        retrieval_details=query_request.retrieval_options,
        rerank_settings=query_request.rerank_settings,
        db_session=db_session,
+        use_agentic_search=query_request.use_agentic_search,
+        skip_gen_ai_answer_generation=query_request.skip_gen_ai_answer_generation,
    )

    packets = stream_chat_message_objects(
--- a/backend/ee/onyx/server/tenants/api.py
+++ b/backend/ee/onyx/server/tenants/api.py
@@ -18,11 +18,16 @@ from ee.onyx.server.tenants.anonymous_user_path import (
 from ee.onyx.server.tenants.anonymous_user_path import modify_anonymous_user_path
 from ee.onyx.server.tenants.anonymous_user_path import validate_anonymous_user_path
 from ee.onyx.server.tenants.billing import fetch_billing_information
+from ee.onyx.server.tenants.billing import fetch_stripe_checkout_session
 from ee.onyx.server.tenants.billing import fetch_tenant_stripe_information
 from ee.onyx.server.tenants.models import AnonymousUserPath
 from ee.onyx.server.tenants.models import BillingInformation
 from ee.onyx.server.tenants.models import ImpersonateRequest
 from ee.onyx.server.tenants.models import ProductGatingRequest
+from ee.onyx.server.tenants.models import ProductGatingResponse
+from ee.onyx.server.tenants.models import SubscriptionSessionResponse
+from ee.onyx.server.tenants.models import SubscriptionStatusResponse
+from ee.onyx.server.tenants.product_gating import store_product_gating
 from ee.onyx.server.tenants.provisioning import delete_user_from_control_plane
 from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
 from ee.onyx.server.tenants.user_mapping import remove_all_users_from_tenant
@@ -34,16 +39,14 @@ from onyx.auth.users import get_redis_strategy
 from onyx.auth.users import optional_user
 from onyx.auth.users import User
 from onyx.configs.app_configs import WEB_DOMAIN
+from onyx.configs.constants import FASTAPI_USERS_AUTH_COOKIE_NAME
 from onyx.db.auth import get_user_count
 from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session
 from onyx.db.engine import get_session_with_tenant
-from onyx.db.notification import create_notification
 from onyx.db.users import delete_user_from_db
 from onyx.db.users import get_user_by_email
 from onyx.server.manage.models import UserByEmail
-from onyx.server.settings.store import load_settings
-from onyx.server.settings.store import store_settings
 from onyx.utils.logger import setup_logger
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

@@ -111,6 +114,7 @@ async def login_as_anonymous_user(
    token = generate_anonymous_user_jwt_token(tenant_id)

    response = Response()
+    response.delete_cookie(FASTAPI_USERS_AUTH_COOKIE_NAME)
    response.set_cookie(
        key=ANONYMOUS_USER_COOKIE_NAME,
        value=token,
@@ -124,37 +128,29 @@ async def login_as_anonymous_user(
@router.post("/product-gating")
 def gate_product(
    product_gating_request: ProductGatingRequest, _: None = Depends(control_plane_dep)
-) -> None:
+) -> ProductGatingResponse:
    """
    Gating the product means that the product is not available to the tenant.
    They will be directed to the billing page.
-    We gate the product when
-    1) User has ended free trial without adding payment method
-    2) User's card has declined
+    We gate the product when their subscription has ended.
    """
-    tenant_id = product_gating_request.tenant_id
-    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+    try:
+        store_product_gating(
+            product_gating_request.tenant_id, product_gating_request.application_status
+        )
+        return ProductGatingResponse(updated=True, error=None)

-    settings = load_settings()
-    settings.product_gating = product_gating_request.product_gating
-    store_settings(settings)
-
-    if product_gating_request.notification:
-        with get_session_with_tenant(tenant_id) as db_session:
-            create_notification(None, product_gating_request.notification, db_session)
-
-    if token is not None:
-        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
+    except Exception as e:
+        logger.exception("Failed to gate product")
+        return ProductGatingResponse(updated=False, error=str(e))


-@router.get("/billing-information", response_model=BillingInformation)
+@router.get("/billing-information")
 async def billing_information(
    _: User = Depends(current_admin_user),
-) -> BillingInformation:
+) -> BillingInformation | SubscriptionStatusResponse:
    logger.info("Fetching billing information")
-    return BillingInformation(
-        **fetch_billing_information(CURRENT_TENANT_ID_CONTEXTVAR.get())
-    )
+    return fetch_billing_information(CURRENT_TENANT_ID_CONTEXTVAR.get())


@router.post("/create-customer-portal-session")
@@ -167,9 +163,10 @@ async def create_customer_portal_session(_: User = Depends(current_admin_user))
        if not stripe_customer_id:
            raise HTTPException(status_code=400, detail="Stripe customer ID not found")
        logger.info(stripe_customer_id)
+
        portal_session = stripe.billing_portal.Session.create(
            customer=stripe_customer_id,
-            return_url=f"{WEB_DOMAIN}/admin/cloud-settings",
+            return_url=f"{WEB_DOMAIN}/admin/billing",
        )
        logger.info(portal_session)
        return {"url": portal_session.url}
@@ -178,6 +175,20 @@ async def create_customer_portal_session(_: User = Depends(current_admin_user))
        raise HTTPException(status_code=500, detail=str(e))


+@router.post("/create-subscription-session")
+async def create_subscription_session(
+    _: User = Depends(current_admin_user),
+) -> SubscriptionSessionResponse:
+    try:
+        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
+        session_id = fetch_stripe_checkout_session(tenant_id)
+        return SubscriptionSessionResponse(sessionId=session_id)
+
+    except Exception as e:
+        logger.exception("Failed to create resubscription session")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
@router.post("/impersonate")
 async def impersonate_user(
    impersonate_request: ImpersonateRequest,
--- a/backend/ee/onyx/server/tenants/billing.py
+++ b/backend/ee/onyx/server/tenants/billing.py
@@ -6,6 +6,7 @@ import stripe
 from ee.onyx.configs.app_configs import STRIPE_PRICE_ID
 from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
 from ee.onyx.server.tenants.access import generate_data_plane_token
+from ee.onyx.server.tenants.models import BillingInformation
 from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
 from onyx.utils.logger import setup_logger

@@ -14,6 +15,19 @@ stripe.api_key = STRIPE_SECRET_KEY
 logger = setup_logger()


+def fetch_stripe_checkout_session(tenant_id: str) -> str:
+    token = generate_data_plane_token()
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+    }
+    url = f"{CONTROL_PLANE_API_BASE_URL}/create-checkout-session"
+    params = {"tenant_id": tenant_id}
+    response = requests.post(url, headers=headers, params=params)
+    response.raise_for_status()
+    return response.json()["sessionId"]
+
+
 def fetch_tenant_stripe_information(tenant_id: str) -> dict:
    token = generate_data_plane_token()
    headers = {
@@ -27,7 +41,7 @@ def fetch_tenant_stripe_information(tenant_id: str) -> dict:
    return response.json()


-def fetch_billing_information(tenant_id: str) -> dict:
+def fetch_billing_information(tenant_id: str) -> BillingInformation:
    logger.info("Fetching billing information")
    token = generate_data_plane_token()
    headers = {
@@ -38,7 +52,7 @@ def fetch_billing_information(tenant_id: str) -> dict:
    params = {"tenant_id": tenant_id}
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
-    billing_info = response.json()
+    billing_info = BillingInformation(**response.json())
    return billing_info


--- a/backend/ee/onyx/server/tenants/models.py
+++ b/backend/ee/onyx/server/tenants/models.py
@@ -1,7 +1,8 @@
+from datetime import datetime
+
 from pydantic import BaseModel

-from onyx.configs.constants import NotificationType
-from onyx.server.settings.models import GatingType
+from onyx.server.settings.models import ApplicationStatus


 class CheckoutSessionCreationRequest(BaseModel):
@@ -15,15 +16,24 @@ class CreateTenantRequest(BaseModel):

 class ProductGatingRequest(BaseModel):
    tenant_id: str
-    product_gating: GatingType
-    notification: NotificationType | None = None
+    application_status: ApplicationStatus
+
+
+class SubscriptionStatusResponse(BaseModel):
+    subscribed: bool


 class BillingInformation(BaseModel):
+    stripe_subscription_id: str
+    status: str
+    current_period_start: datetime
+    current_period_end: datetime
+    number_of_seats: int
+    cancel_at_period_end: bool
+    canceled_at: datetime | None
+    trial_start: datetime | None
+    trial_end: datetime | None
    seats: int
-    subscription_status: str
-    billing_start: str
-    billing_end: str
    payment_method_enabled: bool


@@ -48,3 +58,12 @@ class TenantDeletionPayload(BaseModel):

 class AnonymousUserPath(BaseModel):
    anonymous_user_path: str | None
+
+
+class ProductGatingResponse(BaseModel):
+    updated: bool
+    error: str | None
+
+
+class SubscriptionSessionResponse(BaseModel):
+    sessionId: str
--- a/backend/ee/onyx/server/tenants/product_gating.py
+++ b/backend/ee/onyx/server/tenants/product_gating.py
@@ -0,0 +1,51 @@
+from typing import cast
+
+from ee.onyx.configs.app_configs import GATED_TENANTS_KEY
+from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
+from onyx.redis.redis_pool import get_redis_client
+from onyx.redis.redis_pool import get_redis_replica_client
+from onyx.server.settings.models import ApplicationStatus
+from onyx.server.settings.store import load_settings
+from onyx.server.settings.store import store_settings
+from onyx.setup import setup_logger
+from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
+
+logger = setup_logger()
+
+
+def update_tenant_gating(tenant_id: str, status: ApplicationStatus) -> None:
+    redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)
+
+    # Store the full status
+    status_key = f"tenant:{tenant_id}:status"
+    redis_client.set(status_key, status.value)
+
+    # Maintain the GATED_ACCESS set
+    if status == ApplicationStatus.GATED_ACCESS:
+        redis_client.sadd(GATED_TENANTS_KEY, tenant_id)
+    else:
+        redis_client.srem(GATED_TENANTS_KEY, tenant_id)
+
+
+def store_product_gating(tenant_id: str, application_status: ApplicationStatus) -> None:
+    try:
+        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+
+        settings = load_settings()
+        settings.application_status = application_status
+        store_settings(settings)
+
+        # Store gated tenant information in Redis
+        update_tenant_gating(tenant_id, application_status)
+
+        if token is not None:
+            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
+
+    except Exception:
+        logger.exception("Failed to gate product")
+        raise
+
+
+def get_gated_tenants() -> set[str]:
+    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
+    return cast(set[str], redis_client.smembers(GATED_TENANTS_KEY))
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -24,6 +24,7 @@ from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
 from ee.onyx.server.tenants.user_mapping import user_owns_a_tenant
 from onyx.auth.users import exceptions
 from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
+from onyx.configs.app_configs import DEV_MODE
 from onyx.configs.constants import MilestoneRecordType
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.engine import get_sqlalchemy_engine
@@ -85,7 +86,8 @@ async def create_tenant(email: str, referral_source: str | None = None) -> str:
        # Provision tenant on data plane
        await provision_tenant(tenant_id, email)
        # Notify control plane
-        await notify_control_plane(tenant_id, email, referral_source)
+        if not DEV_MODE:
+            await notify_control_plane(tenant_id, email, referral_source)
    except Exception as e:
        logger.error(f"Tenant provisioning failed: {e}")
        await rollback_tenant_provisioning(tenant_id)
--- a/backend/ee/onyx/server/user_group/models.py
+++ b/backend/ee/onyx/server/user_group/models.py
@@ -58,6 +58,7 @@ class UserGroup(BaseModel):
                    credential=CredentialSnapshot.from_credential_db_model(
                        cc_pair_relationship.cc_pair.credential
                    ),
+                    access_type=cc_pair_relationship.cc_pair.access_type,
                )
                for cc_pair_relationship in user_group_model.cc_pair_relationships
                if cc_pair_relationship.is_current
--- a/backend/model_server/constants.py
+++ b/backend/model_server/constants.py
@@ -28,3 +28,9 @@ class EmbeddingModelTextType:
    @staticmethod
    def get_type(provider: EmbeddingProvider, text_type: EmbedTextType) -> str:
        return EmbeddingModelTextType.PROVIDER_TEXT_TYPE_MAP[provider][text_type]
+
+
+class GPUStatus:
+    CUDA = "cuda"
+    MAC_MPS = "mps"
+    NONE = "none"
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -12,6 +12,7 @@ import voyageai  # type: ignore
 from cohere import AsyncClient as CohereAsyncClient
 from fastapi import APIRouter
 from fastapi import HTTPException
+from fastapi import Request
 from google.oauth2 import service_account  # type: ignore
 from litellm import aembedding
 from litellm.exceptions import RateLimitError
@@ -320,6 +321,7 @@ async def embed_text(
    prefix: str | None,
    api_url: str | None,
    api_version: str | None,
+    gpu_type: str = "UNKNOWN",
 ) -> list[Embedding]:
    if not all(texts):
        logger.error("Empty strings provided for embedding")
@@ -373,8 +375,11 @@ async def embed_text(

        elapsed = time.monotonic() - start
        logger.info(
-            f"Successfully embedded {len(texts)} texts with {total_chars} total characters "
-            f"with provider {provider_type} in {elapsed:.2f}"
+            f"event=embedding_provider "
+            f"texts={len(texts)} "
+            f"chars={total_chars} "
+            f"provider={provider_type} "
+            f"elapsed={elapsed:.2f}"
        )
    elif model_name is not None:
        logger.info(
@@ -403,6 +408,14 @@ async def embed_text(
            f"Successfully embedded {len(texts)} texts with {total_chars} total characters "
            f"with local model {model_name} in {elapsed:.2f}"
        )
+        logger.info(
+            f"event=embedding_model "
+            f"texts={len(texts)} "
+            f"chars={total_chars} "
+            f"model={model_name} "
+            f"gpu={gpu_type} "
+            f"elapsed={elapsed:.2f}"
+        )
    else:
        logger.error("Neither model name nor provider specified for embedding")
        raise ValueError(
@@ -455,8 +468,15 @@ async def litellm_rerank(


@router.post("/bi-encoder-embed")
-async def process_embed_request(
+async def route_bi_encoder_embed(
+    request: Request,
    embed_request: EmbedRequest,
+) -> EmbedResponse:
+    return await process_embed_request(embed_request, request.app.state.gpu_type)
+
+
+async def process_embed_request(
+    embed_request: EmbedRequest, gpu_type: str = "UNKNOWN"
 ) -> EmbedResponse:
    if not embed_request.texts:
        raise HTTPException(status_code=400, detail="No texts to be embedded")
@@ -484,6 +504,7 @@ async def process_embed_request(
            api_url=embed_request.api_url,
            api_version=embed_request.api_version,
            prefix=prefix,
+            gpu_type=gpu_type,
        )
        return EmbedResponse(embeddings=embeddings)
    except RateLimitError as e:
--- a/backend/model_server/main.py
+++ b/backend/model_server/main.py
@@ -16,6 +16,7 @@ from model_server.custom_models import router as custom_models_router
 from model_server.custom_models import warm_up_intent_model
 from model_server.encoders import router as encoders_router
 from model_server.management_endpoints import router as management_router
+from model_server.utils import get_gpu_type
 from onyx import __version__
 from onyx.utils.logger import setup_logger
 from shared_configs.configs import INDEXING_ONLY
@@ -58,12 +59,10 @@ def _move_files_recursively(source: Path, dest: Path, overwrite: bool = False) -

@asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncGenerator:
-    if torch.cuda.is_available():
-        logger.notice("CUDA GPU is available")
-    elif torch.backends.mps.is_available():
-        logger.notice("Mac MPS is available")
-    else:
-        logger.notice("GPU is not available, using CPU")
+    gpu_type = get_gpu_type()
+    logger.notice(f"Torch GPU Detection: gpu_type={gpu_type}")
+
+    app.state.gpu_type = gpu_type

    if TEMP_HF_CACHE_PATH.is_dir():
        logger.notice("Moving contents of temp_huggingface to huggingface cache.")
--- a/backend/model_server/management_endpoints.py
+++ b/backend/model_server/management_endpoints.py
@@ -1,7 +1,9 @@
-import torch
 from fastapi import APIRouter
 from fastapi import Response

+from model_server.constants import GPUStatus
+from model_server.utils import get_gpu_type
+
 router = APIRouter(prefix="/api")


@@ -11,10 +13,7 @@ async def healthcheck() -> Response:


@router.get("/gpu-status")
-async def gpu_status() -> dict[str, bool | str]:
-    if torch.cuda.is_available():
-        return {"gpu_available": True, "type": "cuda"}
-    elif torch.backends.mps.is_available():
-        return {"gpu_available": True, "type": "mps"}
-    else:
-        return {"gpu_available": False, "type": "none"}
+async def route_gpu_status() -> dict[str, bool | str]:
+    gpu_type = get_gpu_type()
+    gpu_available = gpu_type != GPUStatus.NONE
+    return {"gpu_available": gpu_available, "type": gpu_type}
--- a/backend/model_server/utils.py
+++ b/backend/model_server/utils.py
@@ -8,6 +8,9 @@ from typing import Any
 from typing import cast
 from typing import TypeVar

+import torch
+
+from model_server.constants import GPUStatus
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -58,3 +61,12 @@ def simple_log_function_time(
            return cast(F, wrapped_sync_func)

    return decorator
+
+
+def get_gpu_type() -> str:
+    if torch.cuda.is_available():
+        return GPUStatus.CUDA
+    if torch.backends.mps.is_available():
+        return GPUStatus.MAC_MPS
+
+    return GPUStatus.NONE
--- a/backend/onyx/agents/agent_search/basic/graph_builder.py
+++ b/backend/onyx/agents/agent_search/basic/graph_builder.py
@@ -0,0 +1,97 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agents.agent_search.basic.states import BasicInput
+from onyx.agents.agent_search.basic.states import BasicOutput
+from onyx.agents.agent_search.basic.states import BasicState
+from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
+    basic_use_tool_response,
+)
+from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
+from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
+    prepare_tool_input,
+)
+from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def basic_graph_builder() -> StateGraph:
+    graph = StateGraph(
+        state_schema=BasicState,
+        input=BasicInput,
+        output=BasicOutput,
+    )
+
+    ### Add nodes ###
+
+    graph.add_node(
+        node="prepare_tool_input",
+        action=prepare_tool_input,
+    )
+
+    graph.add_node(
+        node="llm_tool_choice",
+        action=llm_tool_choice,
+    )
+
+    graph.add_node(
+        node="tool_call",
+        action=tool_call,
+    )
+
+    graph.add_node(
+        node="basic_use_tool_response",
+        action=basic_use_tool_response,
+    )
+
+    ### Add edges ###
+
+    graph.add_edge(start_key=START, end_key="prepare_tool_input")
+
+    graph.add_edge(start_key="prepare_tool_input", end_key="llm_tool_choice")
+
+    graph.add_conditional_edges("llm_tool_choice", should_continue, ["tool_call", END])
+
+    graph.add_edge(
+        start_key="tool_call",
+        end_key="basic_use_tool_response",
+    )
+
+    graph.add_edge(
+        start_key="basic_use_tool_response",
+        end_key=END,
+    )
+
+    return graph
+
+
+def should_continue(state: BasicState) -> str:
+    return (
+        # If there are no tool calls, basic graph already streamed the answer
+        END
+        if state.tool_choice is None
+        else "tool_call"
+    )
+
+
+if __name__ == "__main__":
+    from onyx.db.engine import get_session_context_manager
+    from onyx.context.search.models import SearchRequest
+    from onyx.llm.factory import get_default_llms
+    from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
+
+    graph = basic_graph_builder()
+    compiled_graph = graph.compile()
+    input = BasicInput(unused=True)
+    primary_llm, fast_llm = get_default_llms()
+    with get_session_context_manager() as db_session:
+        config, _ = get_test_config(
+            db_session=db_session,
+            primary_llm=primary_llm,
+            fast_llm=fast_llm,
+            search_request=SearchRequest(query="How does onyx use FastAPI?"),
+        )
+        compiled_graph.invoke(input, config={"metadata": {"config": config}})
--- a/backend/onyx/agents/agent_search/basic/states.py
+++ b/backend/onyx/agents/agent_search/basic/states.py
@@ -0,0 +1,35 @@
+from typing import TypedDict
+
+from langchain_core.messages import AIMessageChunk
+from pydantic import BaseModel
+
+from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
+from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
+from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
+
+# States contain values that change over the course of graph execution,
+# Config is for values that are set at the start and never change.
+# If you are using a value from the config and realize it needs to change,
+# you should add it to the state and use/update the version in the state.
+
+
+## Graph Input State
+class BasicInput(BaseModel):
+    # Langgraph needs a nonempty input, but we pass in all static
+    # data through a RunnableConfig.
+    unused: bool = True
+
+
+## Graph Output State
+class BasicOutput(TypedDict):
+    tool_call_chunk: AIMessageChunk
+
+
+## Graph State
+class BasicState(
+    BasicInput,
+    ToolChoiceInput,
+    ToolCallUpdate,
+    ToolChoiceUpdate,
+):
+    pass
--- a/backend/onyx/agents/agent_search/basic/utils.py
+++ b/backend/onyx/agents/agent_search/basic/utils.py
@@ -0,0 +1,64 @@
+from collections.abc import Iterator
+from typing import cast
+
+from langchain_core.messages import AIMessageChunk
+from langchain_core.messages import BaseMessage
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import LlmDoc
+from onyx.chat.models import OnyxContext
+from onyx.chat.stream_processing.answer_response_handler import AnswerResponseHandler
+from onyx.chat.stream_processing.answer_response_handler import CitationResponseHandler
+from onyx.chat.stream_processing.answer_response_handler import (
+    PassThroughAnswerResponseHandler,
+)
+from onyx.chat.stream_processing.utils import map_document_id_order
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def process_llm_stream(
+    messages: Iterator[BaseMessage],
+    should_stream_answer: bool,
+    writer: StreamWriter,
+    final_search_results: list[LlmDoc] | None = None,
+    displayed_search_results: list[OnyxContext] | list[LlmDoc] | None = None,
+) -> AIMessageChunk:
+    tool_call_chunk = AIMessageChunk(content="")
+
+    if final_search_results and displayed_search_results:
+        answer_handler: AnswerResponseHandler = CitationResponseHandler(
+            context_docs=final_search_results,
+            final_doc_id_to_rank_map=map_document_id_order(final_search_results),
+            display_doc_id_to_rank_map=map_document_id_order(displayed_search_results),
+        )
+    else:
+        answer_handler = PassThroughAnswerResponseHandler()
+
+    full_answer = ""
+    # This stream will be the llm answer if no tool is chosen. When a tool is chosen,
+    # the stream will contain AIMessageChunks with tool call information.
+    for message in messages:
+        answer_piece = message.content
+        if not isinstance(answer_piece, str):
+            # this is only used for logging, so fine to
+            # just add the string representation
+            answer_piece = str(answer_piece)
+        full_answer += answer_piece
+
+        if isinstance(message, AIMessageChunk) and (
+            message.tool_call_chunks or message.tool_calls
+        ):
+            tool_call_chunk += message  # type: ignore
+        elif should_stream_answer:
+            for response_part in answer_handler.handle_response_part(message, []):
+                write_custom_event(
+                    "basic_response",
+                    response_part,
+                    writer,
+                )
+
+    logger.debug(f"Full answer: {full_answer}")
+    return cast(AIMessageChunk, tool_call_chunk)
--- a/backend/onyx/agents/agent_search/core_state.py
+++ b/backend/onyx/agents/agent_search/core_state.py
@@ -0,0 +1,20 @@
+from operator import add
+from typing import Annotated
+
+from pydantic import BaseModel
+
+
+class CoreState(BaseModel):
+    """
+    This is the core state that is shared across all subgraphs.
+    """
+
+    log_messages: Annotated[list[str], add] = []
+
+
+class SubgraphCoreState(BaseModel):
+    """
+    This is the core state that is shared across all subgraphs.
+    """
+
+    log_messages: Annotated[list[str], add] = []
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/edges.py
@@ -0,0 +1,31 @@
+from collections.abc import Hashable
+from datetime import datetime
+
+from langgraph.types import Send
+
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    SubQuestionAnsweringInput,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalInput,
+)
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def send_to_expanded_retrieval(state: SubQuestionAnsweringInput) -> Send | Hashable:
+    """
+    LangGraph edge to send a sub-question to the expanded retrieval.
+    """
+    edge_start_time = datetime.now()
+
+    return Send(
+        "initial_sub_question_expanded_retrieval",
+        ExpandedRetrievalInput(
+            question=state.question,
+            base_search=False,
+            sub_question_id=state.question_id,
+            log_messages=[f"{edge_start_time} -- Sending to expanded retrieval"],
+        ),
+    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/graph_builder.py
@@ -0,0 +1,137 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.edges import (
+    send_to_expanded_retrieval,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.nodes.check_sub_answer import (
+    check_sub_answer,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.nodes.format_sub_answer import (
+    format_sub_answer,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.nodes.generate_sub_answer import (
+    generate_sub_answer,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.nodes.ingest_retrieved_documents import (
+    ingest_retrieved_documents,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionOutput,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionState,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    SubQuestionAnsweringInput,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.graph_builder import (
+    expanded_retrieval_graph_builder,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def answer_query_graph_builder() -> StateGraph:
+    """
+    LangGraph sub-graph builder for the initial individual sub-answer generation.
+    """
+    graph = StateGraph(
+        state_schema=AnswerQuestionState,
+        input=SubQuestionAnsweringInput,
+        output=AnswerQuestionOutput,
+    )
+
+    ### Add nodes ###
+
+    # The sub-graph that executes the expanded retrieval process for a sub-question
+    expanded_retrieval = expanded_retrieval_graph_builder().compile()
+    graph.add_node(
+        node="initial_sub_question_expanded_retrieval",
+        action=expanded_retrieval,
+    )
+
+    # The node that ingests the retrieved documents and puts them into the proper
+    # state keys.
+    graph.add_node(
+        node="ingest_retrieval",
+        action=ingest_retrieved_documents,
+    )
+
+    # The node that generates the sub-answer
+    graph.add_node(
+        node="generate_sub_answer",
+        action=generate_sub_answer,
+    )
+
+    # The node that checks the sub-answer
+    graph.add_node(
+        node="answer_check",
+        action=check_sub_answer,
+    )
+
+    # The node that formats the sub-answer for the following initial answer generation
+    graph.add_node(
+        node="format_answer",
+        action=format_sub_answer,
+    )
+
+    ### Add edges ###
+
+    graph.add_conditional_edges(
+        source=START,
+        path=send_to_expanded_retrieval,
+        path_map=["initial_sub_question_expanded_retrieval"],
+    )
+    graph.add_edge(
+        start_key="initial_sub_question_expanded_retrieval",
+        end_key="ingest_retrieval",
+    )
+    graph.add_edge(
+        start_key="ingest_retrieval",
+        end_key="generate_sub_answer",
+    )
+    graph.add_edge(
+        start_key="generate_sub_answer",
+        end_key="answer_check",
+    )
+    graph.add_edge(
+        start_key="answer_check",
+        end_key="format_answer",
+    )
+    graph.add_edge(
+        start_key="format_answer",
+        end_key=END,
+    )
+
+    return graph
+
+
+if __name__ == "__main__":
+    from onyx.db.engine import get_session_context_manager
+    from onyx.llm.factory import get_default_llms
+    from onyx.context.search.models import SearchRequest
+
+    graph = answer_query_graph_builder()
+    compiled_graph = graph.compile()
+    primary_llm, fast_llm = get_default_llms()
+    search_request = SearchRequest(
+        query="what can you do with onyx or danswer?",
+    )
+    with get_session_context_manager() as db_session:
+        graph_config, search_tool = get_test_config(
+            db_session, primary_llm, fast_llm, search_request
+        )
+        inputs = SubQuestionAnsweringInput(
+            question="what can you do with onyx?",
+            question_id="0_0",
+            log_messages=[],
+        )
+        for thing in compiled_graph.stream(
+            input=inputs,
+            config={"configurable": {"config": graph_config}},
+        ):
+            logger.debug(thing)
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
@@ -0,0 +1,130 @@
+from datetime import datetime
+from typing import cast
+
+from langchain_core.messages import BaseMessage
+from langchain_core.messages import HumanMessage
+from langchain_core.runnables.config import RunnableConfig
+
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionState,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    SubQuestionAnswerCheckUpdate,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import AgentLLMErrorType
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
+from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
+from onyx.prompts.agent_search import UNKNOWN_ANSWER
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. The sub-answer will be treated as 'relevant'",
+    rate_limit="LLM Rate Limit Error. The sub-answer will be treated as 'relevant'",
+    general_error="General LLM Error. The sub-answer will be treated as 'relevant'",
+)
+
+
+@log_function_time(print_only=True)
+def check_sub_answer(
+    state: AnswerQuestionState, config: RunnableConfig
+) -> SubQuestionAnswerCheckUpdate:
+    """
+    LangGraph node to check the quality of the sub-answer. The answer
+    is represented as a boolean value.
+    """
+    node_start_time = datetime.now()
+
+    level, question_num = parse_question_id(state.question_id)
+    if state.answer == UNKNOWN_ANSWER:
+        return SubQuestionAnswerCheckUpdate(
+            answer_quality=False,
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="initial  - generate individual sub answer",
+                    node_name="check sub answer",
+                    node_start_time=node_start_time,
+                    result="unknown answer",
+                )
+            ],
+        )
+    msg = [
+        HumanMessage(
+            content=SUB_ANSWER_CHECK_PROMPT.format(
+                question=state.question,
+                base_answer=state.answer,
+            )
+        )
+    ]
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    fast_llm = graph_config.tooling.fast_llm
+    agent_error: AgentErrorLog | None = None
+    response: BaseMessage | None = None
+    try:
+        response = fast_llm.invoke(
+            prompt=msg,
+            timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK,
+        )
+
+        quality_str: str = cast(str, response.content)
+        answer_quality = binary_string_test(
+            text=quality_str, positive_value=AGENT_POSITIVE_VALUE_STR
+        )
+        log_result = f"Answer quality: {quality_str}"
+
+    except LLMTimeoutError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        answer_quality = True
+        log_result = agent_error.error_result
+        logger.error("LLM Timeout Error - check sub answer")
+
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+
+        answer_quality = True
+        log_result = agent_error.error_result
+        logger.error("LLM Rate Limit Error - check sub answer")
+
+    return SubQuestionAnswerCheckUpdate(
+        answer_quality=answer_quality,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="initial  - generate individual sub answer",
+                node_name="check sub answer",
+                node_start_time=node_start_time,
+                result=log_result,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/format_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/format_sub_answer.py
@@ -0,0 +1,30 @@
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionOutput,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionState,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import (
+    SubQuestionAnswerResults,
+)
+
+
+def format_sub_answer(state: AnswerQuestionState) -> AnswerQuestionOutput:
+    """
+    LangGraph node to generate the sub-answer format.
+    """
+    return AnswerQuestionOutput(
+        answer_results=[
+            SubQuestionAnswerResults(
+                question=state.question,
+                question_id=state.question_id,
+                verified_high_quality=state.answer_quality,
+                answer=state.answer,
+                sub_query_retrieval_results=state.expanded_retrieval_results,
+                verified_reranked_documents=state.verified_reranked_documents,
+                context_documents=state.context_documents,
+                cited_documents=state.cited_documents,
+                sub_question_retrieval_stats=state.sub_question_retrieval_stats,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
@@ -0,0 +1,196 @@
+from datetime import datetime
+from typing import Any
+from typing import cast
+
+from langchain_core.messages import merge_message_runs
+from langchain_core.runnables.config import RunnableConfig
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionState,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    SubQuestionAnswerGenerationUpdate,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    build_sub_question_answer_prompt,
+)
+from onyx.agents.agent_search.shared_graph_utils.calculations import (
+    dedup_sort_inference_section_list,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    LLM_ANSWER_ERROR_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_persona_agent_prompt_expressions,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import AgentAnswerPiece
+from onyx.chat.models import StreamStopInfo
+from onyx.chat.models import StreamStopReason
+from onyx.chat.models import StreamType
+from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
+from onyx.prompts.agent_search import NO_RECOVERED_DOCS
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. A sub-answer could not be constructed and the sub-question will be ignored.",
+    rate_limit="LLM Rate Limit Error. A sub-answer could not be constructed and the sub-question will be ignored.",
+    general_error="General LLM Error. A sub-answer could not be constructed and the sub-question will be ignored.",
+)
+
+
+@log_function_time(print_only=True)
+def generate_sub_answer(
+    state: AnswerQuestionState,
+    config: RunnableConfig,
+    writer: StreamWriter = lambda _: None,
+) -> SubQuestionAnswerGenerationUpdate:
+    """
+    LangGraph node to generate a sub-answer.
+    """
+    node_start_time = datetime.now()
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    question = state.question
+    state.verified_reranked_documents
+    level, question_num = parse_question_id(state.question_id)
+    context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
+
+    context_docs = dedup_sort_inference_section_list(context_docs)
+
+    persona_contextualized_prompt = get_persona_agent_prompt_expressions(
+        graph_config.inputs.search_request.persona
+    ).contextualized_prompt
+
+    if len(context_docs) == 0:
+        answer_str = NO_RECOVERED_DOCS
+        cited_documents: list = []
+        log_results = "No documents retrieved"
+        write_custom_event(
+            "sub_answers",
+            AgentAnswerPiece(
+                answer_piece=answer_str,
+                level=level,
+                level_question_num=question_num,
+                answer_type="agent_sub_answer",
+            ),
+            writer,
+        )
+    else:
+        fast_llm = graph_config.tooling.fast_llm
+        msg = build_sub_question_answer_prompt(
+            question=question,
+            original_question=graph_config.inputs.search_request.query,
+            docs=context_docs,
+            persona_specification=persona_contextualized_prompt,
+            config=fast_llm.config,
+        )
+
+        response: list[str | list[str | dict[str, Any]]] = []
+        dispatch_timings: list[float] = []
+
+        agent_error: AgentErrorLog | None = None
+
+        try:
+            for message in fast_llm.stream(
+                prompt=msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION,
+            ):
+                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+                content = message.content
+                if not isinstance(content, str):
+                    raise ValueError(
+                        f"Expected content to be a string, but got {type(content)}"
+                    )
+                start_stream_token = datetime.now()
+                write_custom_event(
+                    "sub_answers",
+                    AgentAnswerPiece(
+                        answer_piece=content,
+                        level=level,
+                        level_question_num=question_num,
+                        answer_type="agent_sub_answer",
+                    ),
+                    writer,
+                )
+                end_stream_token = datetime.now()
+                dispatch_timings.append(
+                    (end_stream_token - start_stream_token).microseconds
+                )
+                response.append(content)
+
+        except LLMTimeoutError:
+            agent_error = AgentErrorLog(
+                error_type=AgentLLMErrorType.TIMEOUT,
+                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+                error_result=_llm_node_error_strings.timeout,
+            )
+            logger.error("LLM Timeout Error - generate sub answer")
+        except LLMRateLimitError:
+            agent_error = AgentErrorLog(
+                error_type=AgentLLMErrorType.RATE_LIMIT,
+                error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+                error_result=_llm_node_error_strings.rate_limit,
+            )
+            logger.error("LLM Rate Limit Error - generate sub answer")
+
+        if agent_error:
+            answer_str = LLM_ANSWER_ERROR_MESSAGE
+            cited_documents = []
+            log_results = (
+                agent_error.error_result
+                or "Sub-answer generation failed due to LLM error"
+            )
+
+        else:
+            answer_str = merge_message_runs(response, chunk_separator="")[0].content
+            answer_citation_ids = get_answer_citation_ids(answer_str)
+            cited_documents = [
+                context_docs[id] for id in answer_citation_ids if id < len(context_docs)
+            ]
+            log_results = None
+
+    stop_event = StreamStopInfo(
+        stop_reason=StreamStopReason.FINISHED,
+        stream_type=StreamType.SUB_ANSWER,
+        level=level,
+        level_question_num=question_num,
+    )
+    write_custom_event("stream_finished", stop_event, writer)
+
+    return SubQuestionAnswerGenerationUpdate(
+        answer=answer_str,
+        cited_documents=cited_documents,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="initial - generate individual sub answer",
+                node_name="generate sub answer",
+                node_start_time=node_start_time,
+                result=log_results or "",
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/ingest_retrieved_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/ingest_retrieved_documents.py
@@ -0,0 +1,25 @@
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    SubQuestionRetrievalIngestionUpdate,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalOutput,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
+
+
+def ingest_retrieved_documents(
+    state: ExpandedRetrievalOutput,
+) -> SubQuestionRetrievalIngestionUpdate:
+    """
+    LangGraph node to ingest the retrieved documents to format it for the sub-answer.
+    """
+    sub_question_retrieval_stats = state.expanded_retrieval_result.retrieval_stats
+    if sub_question_retrieval_stats is None:
+        sub_question_retrieval_stats = [AgentChunkRetrievalStats()]
+
+    return SubQuestionRetrievalIngestionUpdate(
+        expanded_retrieval_results=state.expanded_retrieval_result.expanded_query_results,
+        verified_reranked_documents=state.expanded_retrieval_result.verified_reranked_documents,
+        context_documents=state.expanded_retrieval_result.context_documents,
+        sub_question_retrieval_stats=sub_question_retrieval_stats,
+    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/states.py
@@ -0,0 +1,73 @@
+from operator import add
+from typing import Annotated
+
+from pydantic import BaseModel
+
+from onyx.agents.agent_search.core_state import SubgraphCoreState
+from onyx.agents.agent_search.deep_search.main.states import LoggerUpdate
+from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
+from onyx.agents.agent_search.shared_graph_utils.models import QueryRetrievalResult
+from onyx.agents.agent_search.shared_graph_utils.models import (
+    SubQuestionAnswerResults,
+)
+from onyx.agents.agent_search.shared_graph_utils.operators import (
+    dedup_inference_sections,
+)
+from onyx.context.search.models import InferenceSection
+
+
+## Update States
+class SubQuestionAnswerCheckUpdate(LoggerUpdate, BaseModel):
+    answer_quality: bool = False
+    log_messages: list[str] = []
+
+
+class SubQuestionAnswerGenerationUpdate(LoggerUpdate, BaseModel):
+    answer: str = ""
+    log_messages: list[str] = []
+    cited_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
+    # answer_stat: AnswerStats
+
+
+class SubQuestionRetrievalIngestionUpdate(LoggerUpdate, BaseModel):
+    expanded_retrieval_results: list[QueryRetrievalResult] = []
+    verified_reranked_documents: Annotated[
+        list[InferenceSection], dedup_inference_sections
+    ] = []
+    context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
+    sub_question_retrieval_stats: AgentChunkRetrievalStats = AgentChunkRetrievalStats()
+
+
+## Graph Input State
+
+
+class SubQuestionAnsweringInput(SubgraphCoreState):
+    question: str
+    question_id: str
+    # level 0 is original question and first decomposition, level 1 is follow up, etc
+    # question_num is a unique number per original question per level.
+
+
+## Graph State
+
+
+class AnswerQuestionState(
+    SubQuestionAnsweringInput,
+    SubQuestionAnswerGenerationUpdate,
+    SubQuestionAnswerCheckUpdate,
+    SubQuestionRetrievalIngestionUpdate,
+):
+    pass
+
+
+## Graph Output State
+
+
+class AnswerQuestionOutput(LoggerUpdate, BaseModel):
+    """
+    This is a list of results even though each call of this subgraph only returns one result.
+    This is because if we parallelize the answer query subgraph, there will be multiple
+      results in a list so the add operator is used to add them together.
+    """
+
+    answer_results: Annotated[list[SubQuestionAnswerResults], add] = []
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/edges.py
@@ -0,0 +1,50 @@
+from collections.abc import Hashable
+from datetime import datetime
+
+from langgraph.types import Send
+
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionOutput,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    SubQuestionAnsweringInput,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_initial_answer.states import (
+    SubQuestionRetrievalState,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
+
+
+def parallelize_initial_sub_question_answering(
+    state: SubQuestionRetrievalState,
+) -> list[Send | Hashable]:
+    """
+    LangGraph edge to parallelize the initial sub-question answering. If there are no sub-questions,
+    we send empty answers to the initial answer generation, and that answer would be generated
+    solely based on the documents retrieved for the original question.
+    """
+    edge_start_time = datetime.now()
+    if len(state.initial_sub_questions) > 0:
+        return [
+            Send(
+                "answer_query_subgraph",
+                SubQuestionAnsweringInput(
+                    question=question,
+                    question_id=make_question_id(0, question_num + 1),
+                    log_messages=[
+                        f"{edge_start_time} -- Main Edge - Parallelize Initial Sub-question Answering"
+                    ],
+                ),
+            )
+            for question_num, question in enumerate(state.initial_sub_questions)
+        ]
+
+    else:
+        return [
+            Send(
+                "ingest_answers",
+                AnswerQuestionOutput(
+                    answer_results=[],
+                ),
+            )
+        ]
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/graph_builder.py
@@ -0,0 +1,96 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agents.agent_search.deep_search.initial.generate_initial_answer.nodes.generate_initial_answer import (
+    generate_initial_answer,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_initial_answer.nodes.validate_initial_answer import (
+    validate_initial_answer,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_initial_answer.states import (
+    SubQuestionRetrievalInput,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_initial_answer.states import (
+    SubQuestionRetrievalState,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_sub_answers.graph_builder import (
+    generate_sub_answers_graph_builder,
+)
+from onyx.agents.agent_search.deep_search.initial.retrieve_orig_question_docs.graph_builder import (
+    retrieve_orig_question_docs_graph_builder,
+)
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def generate_initial_answer_graph_builder(test_mode: bool = False) -> StateGraph:
+    """
+    LangGraph graph builder for the initial answer generation.
+    """
+    graph = StateGraph(
+        state_schema=SubQuestionRetrievalState,
+        input=SubQuestionRetrievalInput,
+    )
+
+    # The sub-graph that generates the initial sub-answers
+    generate_sub_answers = generate_sub_answers_graph_builder().compile()
+    graph.add_node(
+        node="generate_sub_answers_subgraph",
+        action=generate_sub_answers,
+    )
+
+    # The sub-graph that retrieves the original question documents. This is run
+    # in parallel with the sub-answer generation process
+    retrieve_orig_question_docs = retrieve_orig_question_docs_graph_builder().compile()
+    graph.add_node(
+        node="retrieve_orig_question_docs_subgraph_wrapper",
+        action=retrieve_orig_question_docs,
+    )
+
+    # Node that generates the initial answer using the results of the previous
+    # two sub-graphs
+    graph.add_node(
+        node="generate_initial_answer",
+        action=generate_initial_answer,
+    )
+
+    # Node that validates the initial answer
+    graph.add_node(
+        node="validate_initial_answer",
+        action=validate_initial_answer,
+    )
+
+    ### Add edges ###
+
+    graph.add_edge(
+        start_key=START,
+        end_key="retrieve_orig_question_docs_subgraph_wrapper",
+    )
+
+    graph.add_edge(
+        start_key=START,
+        end_key="generate_sub_answers_subgraph",
+    )
+
+    # Wait for both, the original question docs and the sub-answers to be generated before proceeding
+    graph.add_edge(
+        start_key=[
+            "retrieve_orig_question_docs_subgraph_wrapper",
+            "generate_sub_answers_subgraph",
+        ],
+        end_key="generate_initial_answer",
+    )
+
+    graph.add_edge(
+        start_key="generate_initial_answer",
+        end_key="validate_initial_answer",
+    )
+
+    graph.add_edge(
+        start_key="validate_initial_answer",
+        end_key=END,
+    )
+
+    return graph
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
@@ -0,0 +1,403 @@
+from datetime import datetime
+from typing import Any
+from typing import cast
+
+from langchain_core.messages import HumanMessage
+from langchain_core.messages import merge_content
+from langchain_core.runnables import RunnableConfig
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.deep_search.initial.generate_initial_answer.states import (
+    SubQuestionRetrievalState,
+)
+from onyx.agents.agent_search.deep_search.main.models import AgentBaseMetrics
+from onyx.agents.agent_search.deep_search.main.operations import (
+    calculate_initial_agent_stats,
+)
+from onyx.agents.agent_search.deep_search.main.operations import get_query_info
+from onyx.agents.agent_search.deep_search.main.operations import logger
+from onyx.agents.agent_search.deep_search.main.states import (
+    InitialAnswerUpdate,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    get_prompt_enrichment_components,
+)
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    trim_prompt_piece,
+)
+from onyx.agents.agent_search.shared_graph_utils.calculations import (
+    get_answer_generation_documents,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResultStats
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.operators import (
+    dedup_inference_section_list,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    dispatch_main_answer_stop_info,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_deduplicated_structured_subquestion_documents,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import relevance_from_docs
+from onyx.agents.agent_search.shared_graph_utils.utils import remove_document_citations
+from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import AgentAnswerPiece
+from onyx.chat.models import ExtendedToolResponse
+from onyx.chat.models import StreamingError
+from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
+from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
+from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
+from onyx.prompts.agent_search import INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS
+from onyx.prompts.agent_search import (
+    INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS,
+)
+from onyx.prompts.agent_search import (
+    SUB_QUESTION_ANSWER_TEMPLATE,
+)
+from onyx.prompts.agent_search import UNKNOWN_ANSWER
+from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
+from onyx.utils.timing import log_function_time
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. The initial answer could not be generated.",
+    rate_limit="LLM Rate Limit Error. The initial answer could not be generated.",
+    general_error="General LLM Error. The initial answer could not be generated.",
+)
+
+
+@log_function_time(print_only=True)
+def generate_initial_answer(
+    state: SubQuestionRetrievalState,
+    config: RunnableConfig,
+    writer: StreamWriter = lambda _: None,
+) -> InitialAnswerUpdate:
+    """
+    LangGraph node to generate the initial answer, using the initial sub-questions/sub-answers and the
+    documents retrieved for the original question.
+    """
+    node_start_time = datetime.now()
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    question = graph_config.inputs.search_request.query
+    prompt_enrichment_components = get_prompt_enrichment_components(graph_config)
+
+    # get all documents cited in sub-questions
+    structured_subquestion_docs = get_deduplicated_structured_subquestion_documents(
+        state.sub_question_results
+    )
+
+    orig_question_retrieval_documents = state.orig_question_retrieved_documents
+
+    consolidated_context_docs = structured_subquestion_docs.cited_documents
+    counter = 0
+    for original_doc_number, original_doc in enumerate(
+        orig_question_retrieval_documents
+    ):
+        if original_doc_number not in structured_subquestion_docs.cited_documents:
+            if (
+                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
+                or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
+            ):
+                consolidated_context_docs.append(original_doc)
+                counter += 1
+
+    # sort docs by their scores - though the scores refer to different questions
+    relevant_docs = dedup_inference_section_list(consolidated_context_docs)
+
+    sub_questions: list[str] = []
+
+    # Create the list of documents to stream out. Start with the
+    # ones that wil be in the context (or, if len == 0, use docs
+    # that were retrieved for the original question)
+    answer_generation_documents = get_answer_generation_documents(
+        relevant_docs=relevant_docs,
+        context_documents=structured_subquestion_docs.context_documents,
+        original_question_docs=orig_question_retrieval_documents,
+        max_docs=AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER,
+    )
+
+    # Use the query info from the base document retrieval
+    query_info = get_query_info(state.orig_question_sub_query_retrieval_results)
+
+    assert (
+        graph_config.tooling.search_tool
+    ), "search_tool must be provided for agentic search"
+
+    relevance_list = relevance_from_docs(
+        answer_generation_documents.streaming_documents
+    )
+    for tool_response in yield_search_responses(
+        query=question,
+        reranked_sections=answer_generation_documents.streaming_documents,
+        final_context_sections=answer_generation_documents.context_documents,
+        search_query_info=query_info,
+        get_section_relevance=lambda: relevance_list,
+        search_tool=graph_config.tooling.search_tool,
+    ):
+        write_custom_event(
+            "tool_response",
+            ExtendedToolResponse(
+                id=tool_response.id,
+                response=tool_response.response,
+                level=0,
+                level_question_num=0,  # 0, 0 is the base question
+            ),
+            writer,
+        )
+
+    if len(answer_generation_documents.context_documents) == 0:
+        write_custom_event(
+            "initial_agent_answer",
+            AgentAnswerPiece(
+                answer_piece=UNKNOWN_ANSWER,
+                level=0,
+                level_question_num=0,
+                answer_type="agent_level_answer",
+            ),
+            writer,
+        )
+        dispatch_main_answer_stop_info(0, writer)
+
+        answer = UNKNOWN_ANSWER
+        initial_agent_stats = InitialAgentResultStats(
+            sub_questions={},
+            original_question={},
+            agent_effectiveness={},
+        )
+
+    else:
+        sub_question_answer_results = state.sub_question_results
+
+        # Collect the sub-questions and sub-answers and construct an appropriate
+        # prompt string.
+        # Consider replacing by a function.
+        answered_sub_questions: list[str] = []
+        all_sub_questions: list[str] = []  # Separate list for tracking all questions
+
+        for idx, sub_question_answer_result in enumerate(
+            sub_question_answer_results, start=1
+        ):
+            all_sub_questions.append(sub_question_answer_result.question)
+
+            is_valid_answer = (
+                sub_question_answer_result.verified_high_quality
+                and sub_question_answer_result.answer
+                and sub_question_answer_result.answer != UNKNOWN_ANSWER
+            )
+
+            if is_valid_answer:
+                answered_sub_questions.append(
+                    SUB_QUESTION_ANSWER_TEMPLATE.format(
+                        sub_question=sub_question_answer_result.question,
+                        sub_answer=sub_question_answer_result.answer,
+                        sub_question_num=idx,
+                    )
+                )
+
+        sub_question_answer_str = (
+            "\n\n------\n\n".join(answered_sub_questions)
+            if answered_sub_questions
+            else ""
+        )
+
+        # Use the appropriate prompt based on whether there are sub-questions.
+        base_prompt = (
+            INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS
+            if answered_sub_questions
+            else INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS
+        )
+
+        sub_questions = all_sub_questions  # Replace the original assignment
+
+        model = graph_config.tooling.fast_llm
+
+        doc_context = format_docs(answer_generation_documents.context_documents)
+        doc_context = trim_prompt_piece(
+            config=model.config,
+            prompt_piece=doc_context,
+            reserved_str=(
+                base_prompt
+                + sub_question_answer_str
+                + prompt_enrichment_components.persona_prompts.contextualized_prompt
+                + prompt_enrichment_components.history
+                + prompt_enrichment_components.date_str
+            ),
+        )
+
+        msg = [
+            HumanMessage(
+                content=base_prompt.format(
+                    question=question,
+                    answered_sub_questions=remove_document_citations(
+                        sub_question_answer_str
+                    ),
+                    relevant_docs=doc_context,
+                    persona_specification=prompt_enrichment_components.persona_prompts.contextualized_prompt,
+                    history=prompt_enrichment_components.history,
+                    date_prompt=prompt_enrichment_components.date_str,
+                )
+            )
+        ]
+
+        streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
+        dispatch_timings: list[float] = []
+
+        agent_error: AgentErrorLog | None = None
+
+        try:
+            for message in model.stream(
+                msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
+            ):
+                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+                content = message.content
+                if not isinstance(content, str):
+                    raise ValueError(
+                        f"Expected content to be a string, but got {type(content)}"
+                    )
+                start_stream_token = datetime.now()
+
+                write_custom_event(
+                    "initial_agent_answer",
+                    AgentAnswerPiece(
+                        answer_piece=content,
+                        level=0,
+                        level_question_num=0,
+                        answer_type="agent_level_answer",
+                    ),
+                    writer,
+                )
+                end_stream_token = datetime.now()
+                dispatch_timings.append(
+                    (end_stream_token - start_stream_token).microseconds
+                )
+                streamed_tokens.append(content)
+
+        except LLMTimeoutError:
+            agent_error = AgentErrorLog(
+                error_type=AgentLLMErrorType.TIMEOUT,
+                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+                error_result=_llm_node_error_strings.timeout,
+            )
+            logger.error("LLM Timeout Error - generate initial answer")
+
+        except LLMRateLimitError:
+            agent_error = AgentErrorLog(
+                error_type=AgentLLMErrorType.RATE_LIMIT,
+                error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+                error_result=_llm_node_error_strings.rate_limit,
+            )
+            logger.error("LLM Rate Limit Error - generate initial answer")
+
+        if agent_error:
+            write_custom_event(
+                "initial_agent_answer",
+                StreamingError(
+                    error=AGENT_LLM_TIMEOUT_MESSAGE,
+                ),
+                writer,
+            )
+            return InitialAnswerUpdate(
+                initial_answer=None,
+                answer_error=AgentErrorLog(
+                    error_message=agent_error.error_message or "An LLM error occurred",
+                    error_type=agent_error.error_type,
+                    error_result=agent_error.error_result,
+                ),
+                initial_agent_stats=None,
+                generated_sub_questions=sub_questions,
+                agent_base_end_time=None,
+                agent_base_metrics=None,
+                log_messages=[
+                    get_langgraph_node_log_string(
+                        graph_component="initial - generate initial answer",
+                        node_name="generate initial answer",
+                        node_start_time=node_start_time,
+                        result=agent_error.error_result or "An LLM error occurred",
+                    )
+                ],
+            )
+
+        logger.debug(
+            f"Average dispatch time for initial answer: {sum(dispatch_timings) / len(dispatch_timings)}"
+        )
+
+        dispatch_main_answer_stop_info(0, writer)
+        response = merge_content(*streamed_tokens)
+        answer = cast(str, response)
+
+        initial_agent_stats = calculate_initial_agent_stats(
+            state.sub_question_results, state.orig_question_retrieval_stats
+        )
+
+        logger.debug(
+            f"\n\nYYYYY--Sub-Questions:\n\n{sub_question_answer_str}\n\nStats:\n\n"
+        )
+
+        if initial_agent_stats:
+            logger.debug(initial_agent_stats.original_question)
+            logger.debug(initial_agent_stats.sub_questions)
+            logger.debug(initial_agent_stats.agent_effectiveness)
+
+    agent_base_end_time = datetime.now()
+
+    if agent_base_end_time and state.agent_start_time:
+        duration_s = (agent_base_end_time - state.agent_start_time).total_seconds()
+    else:
+        duration_s = None
+
+    agent_base_metrics = AgentBaseMetrics(
+        num_verified_documents_total=len(relevant_docs),
+        num_verified_documents_core=state.orig_question_retrieval_stats.verified_count,
+        verified_avg_score_core=state.orig_question_retrieval_stats.verified_avg_scores,
+        num_verified_documents_base=initial_agent_stats.sub_questions.get(
+            "num_verified_documents"
+        ),
+        verified_avg_score_base=initial_agent_stats.sub_questions.get(
+            "verified_avg_score"
+        ),
+        base_doc_boost_factor=initial_agent_stats.agent_effectiveness.get(
+            "utilized_chunk_ratio"
+        ),
+        support_boost_factor=initial_agent_stats.agent_effectiveness.get(
+            "support_ratio"
+        ),
+        duration_s=duration_s,
+    )
+
+    return InitialAnswerUpdate(
+        initial_answer=answer,
+        initial_agent_stats=initial_agent_stats,
+        generated_sub_questions=sub_questions,
+        agent_base_end_time=agent_base_end_time,
+        agent_base_metrics=agent_base_metrics,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="initial - generate initial answer",
+                node_name="generate initial answer",
+                node_start_time=node_start_time,
+                result="",
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/validate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/validate_initial_answer.py
@@ -0,0 +1,42 @@
+from datetime import datetime
+
+from onyx.agents.agent_search.deep_search.initial.generate_initial_answer.states import (
+    SubQuestionRetrievalState,
+)
+from onyx.agents.agent_search.deep_search.main.operations import logger
+from onyx.agents.agent_search.deep_search.main.states import (
+    InitialAnswerQualityUpdate,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.utils.timing import log_function_time
+
+
+@log_function_time(print_only=True)
+def validate_initial_answer(
+    state: SubQuestionRetrievalState,
+) -> InitialAnswerQualityUpdate:
+    """
+    Check whether the initial answer sufficiently addresses the original user question.
+    """
+
+    node_start_time = datetime.now()
+
+    logger.debug(
+        f"--------{node_start_time}--------Checking for base answer validity - for not set True/False manually"
+    )
+
+    verdict = True  # not actually required as already streamed out. Refinement will do similar
+
+    return InitialAnswerQualityUpdate(
+        initial_answer_quality_eval=verdict,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="initial - generate initial answer",
+                node_name="validate initial answer",
+                node_start_time=node_start_time,
+                result="",
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/states.py
@@ -0,0 +1,51 @@
+from operator import add
+from typing import Annotated
+from typing import TypedDict
+
+from onyx.agents.agent_search.core_state import CoreState
+from onyx.agents.agent_search.deep_search.main.states import (
+    ExploratorySearchUpdate,
+)
+from onyx.agents.agent_search.deep_search.main.states import (
+    InitialAnswerQualityUpdate,
+)
+from onyx.agents.agent_search.deep_search.main.states import (
+    InitialAnswerUpdate,
+)
+from onyx.agents.agent_search.deep_search.main.states import (
+    InitialQuestionDecompositionUpdate,
+)
+from onyx.agents.agent_search.deep_search.main.states import (
+    OrigQuestionRetrievalUpdate,
+)
+from onyx.agents.agent_search.deep_search.main.states import (
+    SubQuestionResultsUpdate,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.models import (
+    QuestionRetrievalResult,
+)
+from onyx.context.search.models import InferenceSection
+
+
+### States ###
+class SubQuestionRetrievalInput(CoreState):
+    exploratory_search_results: list[InferenceSection]
+
+
+## Graph State
+class SubQuestionRetrievalState(
+    # This includes the core state
+    SubQuestionRetrievalInput,
+    InitialQuestionDecompositionUpdate,
+    InitialAnswerUpdate,
+    SubQuestionResultsUpdate,
+    OrigQuestionRetrievalUpdate,
+    InitialAnswerQualityUpdate,
+    ExploratorySearchUpdate,
+):
+    base_raw_search_result: Annotated[list[QuestionRetrievalResult], add]
+
+
+## Graph Output State
+class SubQuestionRetrievalOutput(TypedDict):
+    log_messages: list[str]
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/edges.py
@@ -0,0 +1,48 @@
+from collections.abc import Hashable
+from datetime import datetime
+
+from langgraph.types import Send
+
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionOutput,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    SubQuestionAnsweringInput,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_initial_answer.states import (
+    SubQuestionRetrievalState,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
+
+
+def parallelize_initial_sub_question_answering(
+    state: SubQuestionRetrievalState,
+) -> list[Send | Hashable]:
+    """
+    LangGraph edge to parallelize the initial sub-question answering.
+    """
+    edge_start_time = datetime.now()
+    if len(state.initial_sub_questions) > 0:
+        return [
+            Send(
+                "answer_sub_question_subgraphs",
+                SubQuestionAnsweringInput(
+                    question=question,
+                    question_id=make_question_id(0, question_num + 1),
+                    log_messages=[
+                        f"{edge_start_time} -- Main Edge - Parallelize Initial Sub-question Answering"
+                    ],
+                ),
+            )
+            for question_num, question in enumerate(state.initial_sub_questions)
+        ]
+
+    else:
+        return [
+            Send(
+                "ingest_answers",
+                AnswerQuestionOutput(
+                    answer_results=[],
+                ),
+            )
+        ]
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/graph_builder.py
@@ -0,0 +1,81 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.graph_builder import (
+    answer_query_graph_builder,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_sub_answers.edges import (
+    parallelize_initial_sub_question_answering,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_sub_answers.nodes.decompose_orig_question import (
+    decompose_orig_question,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_sub_answers.nodes.format_initial_sub_answers import (
+    format_initial_sub_answers,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_sub_answers.states import (
+    SubQuestionAnsweringInput,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_sub_answers.states import (
+    SubQuestionAnsweringState,
+)
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+test_mode = False
+
+
+def generate_sub_answers_graph_builder() -> StateGraph:
+    """
+    LangGraph graph builder for the initial sub-answer generation process.
+    It generates the initial sub-questions and produces the answers.
+    """
+
+    graph = StateGraph(
+        state_schema=SubQuestionAnsweringState,
+        input=SubQuestionAnsweringInput,
+    )
+
+    # Decompose the original question into sub-questions
+    graph.add_node(
+        node="decompose_orig_question",
+        action=decompose_orig_question,
+    )
+
+    # The sub-graph that executes the initial sub-question answering for
+    # each of the sub-questions.
+    answer_sub_question_subgraphs = answer_query_graph_builder().compile()
+    graph.add_node(
+        node="answer_sub_question_subgraphs",
+        action=answer_sub_question_subgraphs,
+    )
+
+    # Node that collects and formats the initial sub-question answers
+    graph.add_node(
+        node="format_initial_sub_question_answers",
+        action=format_initial_sub_answers,
+    )
+
+    graph.add_edge(
+        start_key=START,
+        end_key="decompose_orig_question",
+    )
+
+    graph.add_conditional_edges(
+        source="decompose_orig_question",
+        path=parallelize_initial_sub_question_answering,
+        path_map=["answer_sub_question_subgraphs"],
+    )
+    graph.add_edge(
+        start_key=["answer_sub_question_subgraphs"],
+        end_key="format_initial_sub_question_answers",
+    )
+
+    graph.add_edge(
+        start_key="format_initial_sub_question_answers",
+        end_key=END,
+    )
+
+    return graph
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
@@ -0,0 +1,182 @@
+from datetime import datetime
+from typing import cast
+
+from langchain_core.messages import HumanMessage
+from langchain_core.messages import merge_content
+from langchain_core.runnables import RunnableConfig
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.deep_search.initial.generate_initial_answer.states import (
+    SubQuestionRetrievalState,
+)
+from onyx.agents.agent_search.deep_search.main.models import (
+    AgentRefinedMetrics,
+)
+from onyx.agents.agent_search.deep_search.main.operations import dispatch_subquestion
+from onyx.agents.agent_search.deep_search.main.operations import (
+    dispatch_subquestion_sep,
+)
+from onyx.agents.agent_search.deep_search.main.states import (
+    InitialQuestionDecompositionUpdate,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    build_history_prompt,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import StreamStopInfo
+from onyx.chat.models import StreamStopReason
+from onyx.chat.models import StreamType
+from onyx.chat.models import SubQuestionPiece
+from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
+from onyx.prompts.agent_search import (
+    INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT,
+)
+from onyx.prompts.agent_search import (
+    INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT,
+)
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. Sub-questions could not be generated.",
+    rate_limit="LLM Rate Limit Error. Sub-questions could not be generated.",
+    general_error="General LLM Error. Sub-questions could not be generated.",
+)
+
+
+@log_function_time(print_only=True)
+def decompose_orig_question(
+    state: SubQuestionRetrievalState,
+    config: RunnableConfig,
+    writer: StreamWriter = lambda _: None,
+) -> InitialQuestionDecompositionUpdate:
+    """
+    LangGraph node to decompose the original question into sub-questions.
+    """
+    node_start_time = datetime.now()
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    question = graph_config.inputs.search_request.query
+    perform_initial_search_decomposition = (
+        graph_config.behavior.perform_initial_search_decomposition
+    )
+    # Get the rewritten queries in a defined format
+    model = graph_config.tooling.fast_llm
+
+    history = build_history_prompt(graph_config, question)
+
+    # Use the initial search results to inform the decomposition
+    agent_start_time = datetime.now()
+
+    # Initial search to inform decomposition. Just get top 3 fits
+
+    if perform_initial_search_decomposition:
+        # Due to unfortunate state representation in LangGraph, we need here to double check that the retrieval has
+        # happened prior to this point, allowing silent failure here since it is not critical for decomposition in
+        # all queries.
+        if not state.exploratory_search_results:
+            logger.error("Initial search for decomposition failed")
+
+        sample_doc_str = "\n\n".join(
+            [
+                doc.combined_content
+                for doc in state.exploratory_search_results[
+                    :AGENT_NUM_DOCS_FOR_DECOMPOSITION
+                ]
+            ]
+        )
+
+        decomposition_prompt = INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT.format(
+            question=question, sample_doc_str=sample_doc_str, history=history
+        )
+
+    else:
+        decomposition_prompt = (
+            INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT.format(
+                question=question, history=history
+            )
+        )
+
+    # Start decomposition
+
+    msg = [HumanMessage(content=decomposition_prompt)]
+
+    # Send the initial question as a subquestion with number 0
+    write_custom_event(
+        "decomp_qs",
+        SubQuestionPiece(
+            sub_question=question,
+            level=0,
+            level_question_num=0,
+        ),
+        writer,
+    )
+
+    # dispatches custom events for subquestion tokens, adding in subquestion ids.
+
+    streamed_tokens: list[BaseMessage_Content] = []
+
+    try:
+        streamed_tokens = dispatch_separated(
+            model.stream(
+                msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
+            ),
+            dispatch_subquestion(0, writer),
+            sep_callback=dispatch_subquestion_sep(0, writer),
+        )
+
+        decomposition_response = merge_content(*streamed_tokens)
+
+        list_of_subqs = cast(str, decomposition_response).split("\n")
+
+        initial_sub_questions = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
+        log_result = f"decomposed original question into {len(initial_sub_questions)} subquestions"
+
+        stop_event = StreamStopInfo(
+            stop_reason=StreamStopReason.FINISHED,
+            stream_type=StreamType.SUB_QUESTIONS,
+            level=0,
+        )
+        write_custom_event("stream_finished", stop_event, writer)
+
+    except LLMTimeoutError as e:
+        logger.error("LLM Timeout Error - decompose orig question")
+        raise e  # fail loudly on this critical step
+    except LLMRateLimitError as e:
+        logger.error("LLM Rate Limit Error - decompose orig question")
+        raise e
+
+    return InitialQuestionDecompositionUpdate(
+        initial_sub_questions=initial_sub_questions,
+        agent_start_time=agent_start_time,
+        agent_refined_start_time=None,
+        agent_refined_end_time=None,
+        agent_refined_metrics=AgentRefinedMetrics(
+            refined_doc_boost_factor=None,
+            refined_question_boost_factor=None,
+            duration_s=None,
+        ),
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="initial - generate sub answers",
+                node_name="decompose original question",
+                node_start_time=node_start_time,
+                result=log_result,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/format_initial_sub_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/format_initial_sub_answers.py
@@ -0,0 +1,50 @@
+from datetime import datetime
+
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionOutput,
+)
+from onyx.agents.agent_search.deep_search.main.states import (
+    SubQuestionResultsUpdate,
+)
+from onyx.agents.agent_search.shared_graph_utils.operators import (
+    dedup_inference_sections,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+
+
+def format_initial_sub_answers(
+    state: AnswerQuestionOutput,
+) -> SubQuestionResultsUpdate:
+    """
+    LangGraph node to format the answers to the initial sub-questions, including
+    deduping verified documents and context documents.
+    """
+    node_start_time = datetime.now()
+
+    documents = []
+    context_documents = []
+    cited_documents = []
+    answer_results = state.answer_results
+    for answer_result in answer_results:
+        documents.extend(answer_result.verified_reranked_documents)
+        context_documents.extend(answer_result.context_documents)
+        cited_documents.extend(answer_result.cited_documents)
+
+    return SubQuestionResultsUpdate(
+        # Deduping is done by the documents operator for the main graph
+        # so we might not need to dedup here
+        verified_reranked_documents=dedup_inference_sections(documents, []),
+        context_documents=dedup_inference_sections(context_documents, []),
+        cited_documents=dedup_inference_sections(cited_documents, []),
+        sub_question_results=answer_results,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="initial - generate sub answers",
+                node_name="format initial sub answers",
+                node_start_time=node_start_time,
+                result="",
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/states.py
@@ -0,0 +1,34 @@
+from typing import TypedDict
+
+from onyx.agents.agent_search.core_state import CoreState
+from onyx.agents.agent_search.deep_search.main.states import (
+    InitialAnswerUpdate,
+)
+from onyx.agents.agent_search.deep_search.main.states import (
+    InitialQuestionDecompositionUpdate,
+)
+from onyx.agents.agent_search.deep_search.main.states import (
+    SubQuestionResultsUpdate,
+)
+from onyx.context.search.models import InferenceSection
+
+
+### States ###
+class SubQuestionAnsweringInput(CoreState):
+    exploratory_search_results: list[InferenceSection]
+
+
+## Graph State
+class SubQuestionAnsweringState(
+    # This includes the core state
+    SubQuestionAnsweringInput,
+    InitialQuestionDecompositionUpdate,
+    InitialAnswerUpdate,
+    SubQuestionResultsUpdate,
+):
+    pass
+
+
+## Graph Output State
+class SubQuestionAnsweringOutput(TypedDict):
+    log_messages: list[str]
--- a/backend/onyx/agents/agent_search/deep_search/initial/retrieve_orig_question_docs/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/retrieve_orig_question_docs/graph_builder.py
@@ -0,0 +1,81 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agents.agent_search.deep_search.initial.retrieve_orig_question_docs.nodes.format_orig_question_search_input import (
+    format_orig_question_search_input,
+)
+from onyx.agents.agent_search.deep_search.initial.retrieve_orig_question_docs.nodes.format_orig_question_search_output import (
+    format_orig_question_search_output,
+)
+from onyx.agents.agent_search.deep_search.initial.retrieve_orig_question_docs.states import (
+    BaseRawSearchInput,
+)
+from onyx.agents.agent_search.deep_search.initial.retrieve_orig_question_docs.states import (
+    BaseRawSearchOutput,
+)
+from onyx.agents.agent_search.deep_search.initial.retrieve_orig_question_docs.states import (
+    BaseRawSearchState,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.graph_builder import (
+    expanded_retrieval_graph_builder,
+)
+
+
+def retrieve_orig_question_docs_graph_builder() -> StateGraph:
+    """
+    LangGraph graph builder for the retrieval of documents
+    that are relevant to the original question. This is
+    largely a wrapper around the expanded retrieval process to
+    ensure parallelism with the sub-question answer process.
+    """
+    graph = StateGraph(
+        state_schema=BaseRawSearchState,
+        input=BaseRawSearchInput,
+        output=BaseRawSearchOutput,
+    )
+
+    ### Add nodes ###
+
+    # Format the original question search output
+    graph.add_node(
+        node="format_orig_question_search_output",
+        action=format_orig_question_search_output,
+    )
+
+    # The sub-graph that executes the expanded retrieval process
+    expanded_retrieval = expanded_retrieval_graph_builder().compile()
+    graph.add_node(
+        node="retrieve_orig_question_docs_subgraph",
+        action=expanded_retrieval,
+    )
+
+    # Format the original question search input
+    graph.add_node(
+        node="format_orig_question_search_input",
+        action=format_orig_question_search_input,
+    )
+
+    ### Add edges ###
+
+    graph.add_edge(start_key=START, end_key="format_orig_question_search_input")
+
+    graph.add_edge(
+        start_key="format_orig_question_search_input",
+        end_key="retrieve_orig_question_docs_subgraph",
+    )
+    graph.add_edge(
+        start_key="retrieve_orig_question_docs_subgraph",
+        end_key="format_orig_question_search_output",
+    )
+
+    graph.add_edge(
+        start_key="format_orig_question_search_output",
+        end_key=END,
+    )
+
+    return graph
+
+
+if __name__ == "__main__":
+    pass
--- a/backend/onyx/agents/agent_search/deep_search/initial/retrieve_orig_question_docs/nodes/format_orig_question_search_input.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/retrieve_orig_question_docs/nodes/format_orig_question_search_input.py
@@ -0,0 +1,28 @@
+from typing import cast
+
+from langchain_core.runnables.config import RunnableConfig
+
+from onyx.agents.agent_search.core_state import CoreState
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalInput,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def format_orig_question_search_input(
+    state: CoreState, config: RunnableConfig
+) -> ExpandedRetrievalInput:
+    """
+    LangGraph node to format the search input for the original question.
+    """
+    logger.debug("generate_raw_search_data")
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    return ExpandedRetrievalInput(
+        question=graph_config.inputs.search_request.query,
+        base_search=True,
+        sub_question_id=None,  # This graph is always and only used for the original question
+        log_messages=[],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/retrieve_orig_question_docs/nodes/format_orig_question_search_output.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/retrieve_orig_question_docs/nodes/format_orig_question_search_output.py
@@ -0,0 +1,30 @@
+from onyx.agents.agent_search.deep_search.main.states import OrigQuestionRetrievalUpdate
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalOutput,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def format_orig_question_search_output(
+    state: ExpandedRetrievalOutput,
+) -> OrigQuestionRetrievalUpdate:
+    """
+    LangGraph node to format the search result for the original question into the
+    proper format.
+    """
+    sub_question_retrieval_stats = state.expanded_retrieval_result.retrieval_stats
+    if sub_question_retrieval_stats is None:
+        sub_question_retrieval_stats = AgentChunkRetrievalStats()
+    else:
+        sub_question_retrieval_stats = sub_question_retrieval_stats
+
+    return OrigQuestionRetrievalUpdate(
+        orig_question_verified_reranked_documents=state.expanded_retrieval_result.verified_reranked_documents,
+        orig_question_sub_query_retrieval_results=state.expanded_retrieval_result.expanded_query_results,
+        orig_question_retrieved_documents=state.retrieved_documents,
+        orig_question_retrieval_stats=sub_question_retrieval_stats,
+        log_messages=[],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/retrieve_orig_question_docs/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/retrieve_orig_question_docs/states.py
@@ -0,0 +1,29 @@
+from onyx.agents.agent_search.deep_search.main.states import (
+    OrigQuestionRetrievalUpdate,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalInput,
+)
+
+
+## Graph Input State
+class BaseRawSearchInput(ExpandedRetrievalInput):
+    pass
+
+
+## Graph Output State
+class BaseRawSearchOutput(OrigQuestionRetrievalUpdate):
+    """
+    This is a list of results even though each call of this subgraph only returns one result.
+    This is because if we parallelize the answer query subgraph, there will be multiple
+      results in a list so the add operator is used to add them together.
+    """
+
+    # base_expanded_retrieval_result: QuestionRetrievalResult = QuestionRetrievalResult()
+
+
+## Graph State
+class BaseRawSearchState(
+    BaseRawSearchInput, BaseRawSearchOutput, OrigQuestionRetrievalUpdate
+):
+    pass
--- a/backend/onyx/agents/agent_search/deep_search/main/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/edges.py
@@ -0,0 +1,113 @@
+from collections.abc import Hashable
+from datetime import datetime
+from typing import cast
+from typing import Literal
+
+from langchain_core.runnables import RunnableConfig
+from langgraph.types import Send
+
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionOutput,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    SubQuestionAnsweringInput,
+)
+from onyx.agents.agent_search.deep_search.main.states import MainState
+from onyx.agents.agent_search.deep_search.main.states import (
+    RequireRefinemenEvalUpdate,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def route_initial_tool_choice(
+    state: MainState, config: RunnableConfig
+) -> Literal["tool_call", "start_agent_search", "logging_node"]:
+    """
+    LangGraph edge to route to agent search.
+    """
+    agent_config = cast(GraphConfig, config["metadata"]["config"])
+    if state.tool_choice is not None:
+        if (
+            agent_config.behavior.use_agentic_search
+            and agent_config.tooling.search_tool is not None
+            and state.tool_choice.tool.name == agent_config.tooling.search_tool.name
+        ):
+            return "start_agent_search"
+        else:
+            return "tool_call"
+    else:
+        return "logging_node"
+
+
+def parallelize_initial_sub_question_answering(
+    state: MainState,
+) -> list[Send | Hashable]:
+    edge_start_time = datetime.now()
+    if len(state.initial_sub_questions) > 0:
+        return [
+            Send(
+                "answer_query_subgraph",
+                SubQuestionAnsweringInput(
+                    question=question,
+                    question_id=make_question_id(0, question_num + 1),
+                    log_messages=[
+                        f"{edge_start_time} -- Main Edge - Parallelize Initial Sub-question Answering"
+                    ],
+                ),
+            )
+            for question_num, question in enumerate(state.initial_sub_questions)
+        ]
+
+    else:
+        return [
+            Send(
+                "ingest_answers",
+                AnswerQuestionOutput(
+                    answer_results=[],
+                ),
+            )
+        ]
+
+
+# Define the function that determines whether to continue or not
+def continue_to_refined_answer_or_end(
+    state: RequireRefinemenEvalUpdate,
+) -> Literal["create_refined_sub_questions", "logging_node"]:
+    if state.require_refined_answer_eval:
+        return "create_refined_sub_questions"
+    else:
+        return "logging_node"
+
+
+def parallelize_refined_sub_question_answering(
+    state: MainState,
+) -> list[Send | Hashable]:
+    edge_start_time = datetime.now()
+    if len(state.refined_sub_questions) > 0:
+        return [
+            Send(
+                "answer_refined_question_subgraphs",
+                SubQuestionAnsweringInput(
+                    question=question_data.sub_question,
+                    question_id=make_question_id(1, question_num),
+                    log_messages=[
+                        f"{edge_start_time} -- Main Edge - Parallelize Refined Sub-question Answering"
+                    ],
+                ),
+            )
+            for question_num, question_data in state.refined_sub_questions.items()
+        ]
+
+    else:
+        return [
+            Send(
+                "ingest_refined_sub_answers",
+                AnswerQuestionOutput(
+                    answer_results=[],
+                ),
+            )
+        ]
--- a/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
@@ -0,0 +1,263 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agents.agent_search.deep_search.initial.generate_initial_answer.graph_builder import (
+    generate_initial_answer_graph_builder,
+)
+from onyx.agents.agent_search.deep_search.main.edges import (
+    continue_to_refined_answer_or_end,
+)
+from onyx.agents.agent_search.deep_search.main.edges import (
+    parallelize_refined_sub_question_answering,
+)
+from onyx.agents.agent_search.deep_search.main.edges import (
+    route_initial_tool_choice,
+)
+from onyx.agents.agent_search.deep_search.main.nodes.compare_answers import (
+    compare_answers,
+)
+from onyx.agents.agent_search.deep_search.main.nodes.create_refined_sub_questions import (
+    create_refined_sub_questions,
+)
+from onyx.agents.agent_search.deep_search.main.nodes.decide_refinement_need import (
+    decide_refinement_need,
+)
+from onyx.agents.agent_search.deep_search.main.nodes.extract_entities_terms import (
+    extract_entities_terms,
+)
+from onyx.agents.agent_search.deep_search.main.nodes.generate_validate_refined_answer import (
+    generate_validate_refined_answer,
+)
+from onyx.agents.agent_search.deep_search.main.nodes.ingest_refined_sub_answers import (
+    ingest_refined_sub_answers,
+)
+from onyx.agents.agent_search.deep_search.main.nodes.persist_agent_results import (
+    persist_agent_results,
+)
+from onyx.agents.agent_search.deep_search.main.nodes.start_agent_search import (
+    start_agent_search,
+)
+from onyx.agents.agent_search.deep_search.main.states import MainInput
+from onyx.agents.agent_search.deep_search.main.states import MainState
+from onyx.agents.agent_search.deep_search.refinement.consolidate_sub_answers.graph_builder import (
+    answer_refined_query_graph_builder,
+)
+from onyx.agents.agent_search.orchestration.nodes.basic_use_tool_response import (
+    basic_use_tool_response,
+)
+from onyx.agents.agent_search.orchestration.nodes.llm_tool_choice import llm_tool_choice
+from onyx.agents.agent_search.orchestration.nodes.prepare_tool_input import (
+    prepare_tool_input,
+)
+from onyx.agents.agent_search.orchestration.nodes.tool_call import tool_call
+from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+test_mode = False
+
+
+def main_graph_builder(test_mode: bool = False) -> StateGraph:
+    """
+    LangGraph graph builder for the main agent search process.
+    """
+    graph = StateGraph(
+        state_schema=MainState,
+        input=MainInput,
+    )
+
+    # Prepare the tool input
+    graph.add_node(
+        node="prepare_tool_input",
+        action=prepare_tool_input,
+    )
+
+    # Choose the initial tool
+    graph.add_node(
+        node="initial_tool_choice",
+        action=llm_tool_choice,
+    )
+
+    # Call the tool, if required
+    graph.add_node(
+        node="tool_call",
+        action=tool_call,
+    )
+
+    # Use the tool response
+    graph.add_node(
+        node="basic_use_tool_response",
+        action=basic_use_tool_response,
+    )
+
+    # Start the agent search process
+    graph.add_node(
+        node="start_agent_search",
+        action=start_agent_search,
+    )
+
+    # The sub-graph for the initial answer generation
+    generate_initial_answer_subgraph = generate_initial_answer_graph_builder().compile()
+    graph.add_node(
+        node="generate_initial_answer_subgraph",
+        action=generate_initial_answer_subgraph,
+    )
+
+    # Create the refined sub-questions
+    graph.add_node(
+        node="create_refined_sub_questions",
+        action=create_refined_sub_questions,
+    )
+
+    # Subgraph for the refined sub-answer generation
+    answer_refined_question = answer_refined_query_graph_builder().compile()
+    graph.add_node(
+        node="answer_refined_question_subgraphs",
+        action=answer_refined_question,
+    )
+
+    # Ingest the refined sub-answers
+    graph.add_node(
+        node="ingest_refined_sub_answers",
+        action=ingest_refined_sub_answers,
+    )
+
+    # Node to generate the refined answer
+    graph.add_node(
+        node="generate_validate_refined_answer",
+        action=generate_validate_refined_answer,
+    )
+
+    # Early node to extract the entities and terms from the initial answer,
+    # This information is used to inform the creation the refined sub-questions
+    graph.add_node(
+        node="extract_entity_term",
+        action=extract_entities_terms,
+    )
+
+    # Decide if the answer needs to be refined (currently always true)
+    graph.add_node(
+        node="decide_refinement_need",
+        action=decide_refinement_need,
+    )
+
+    # Compare the initial and refined answers, and determine whether
+    # the refined answer is sufficiently better
+    graph.add_node(
+        node="compare_answers",
+        action=compare_answers,
+    )
+
+    # Log the results. This will log the stats as well as the answers, sub-questions, and sub-answers
+    graph.add_node(
+        node="logging_node",
+        action=persist_agent_results,
+    )
+
+    ### Add edges ###
+
+    graph.add_edge(start_key=START, end_key="prepare_tool_input")
+
+    graph.add_edge(
+        start_key="prepare_tool_input",
+        end_key="initial_tool_choice",
+    )
+
+    graph.add_conditional_edges(
+        "initial_tool_choice",
+        route_initial_tool_choice,
+        ["tool_call", "start_agent_search", "logging_node"],
+    )
+
+    graph.add_edge(
+        start_key="tool_call",
+        end_key="basic_use_tool_response",
+    )
+    graph.add_edge(
+        start_key="basic_use_tool_response",
+        end_key="logging_node",
+    )
+
+    graph.add_edge(
+        start_key="start_agent_search",
+        end_key="generate_initial_answer_subgraph",
+    )
+
+    graph.add_edge(
+        start_key="start_agent_search",
+        end_key="extract_entity_term",
+    )
+
+    # Wait for the initial answer generation and the entity/term extraction to be complete
+    # before deciding if a refinement is needed.
+    graph.add_edge(
+        start_key=["generate_initial_answer_subgraph", "extract_entity_term"],
+        end_key="decide_refinement_need",
+    )
+
+    graph.add_conditional_edges(
+        source="decide_refinement_need",
+        path=continue_to_refined_answer_or_end,
+        path_map=["create_refined_sub_questions", "logging_node"],
+    )
+
+    graph.add_conditional_edges(
+        source="create_refined_sub_questions",
+        path=parallelize_refined_sub_question_answering,
+        path_map=["answer_refined_question_subgraphs"],
+    )
+    graph.add_edge(
+        start_key="answer_refined_question_subgraphs",
+        end_key="ingest_refined_sub_answers",
+    )
+
+    graph.add_edge(
+        start_key="ingest_refined_sub_answers",
+        end_key="generate_validate_refined_answer",
+    )
+
+    graph.add_edge(
+        start_key="generate_validate_refined_answer",
+        end_key="compare_answers",
+    )
+    graph.add_edge(
+        start_key="compare_answers",
+        end_key="logging_node",
+    )
+
+    graph.add_edge(
+        start_key="logging_node",
+        end_key=END,
+    )
+
+    return graph
+
+
+if __name__ == "__main__":
+    pass
+
+    from onyx.db.engine import get_session_context_manager
+    from onyx.llm.factory import get_default_llms
+    from onyx.context.search.models import SearchRequest
+
+    graph = main_graph_builder()
+    compiled_graph = graph.compile()
+    primary_llm, fast_llm = get_default_llms()
+
+    with get_session_context_manager() as db_session:
+        search_request = SearchRequest(query="Who created Excel?")
+        graph_config = get_test_config(
+            db_session, primary_llm, fast_llm, search_request
+        )
+
+        inputs = MainInput(log_messages=[])
+
+        for thing in compiled_graph.stream(
+            input=inputs,
+            config={"configurable": {"config": graph_config}},
+            stream_mode="custom",
+            subgraphs=True,
+        ):
+            logger.debug(thing)
--- a/backend/onyx/agents/agent_search/deep_search/main/models.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/models.py
@@ -0,0 +1,36 @@
+from pydantic import BaseModel
+
+
+class RefinementSubQuestion(BaseModel):
+    sub_question: str
+    sub_question_id: str
+    verified: bool
+    answered: bool
+    answer: str
+
+
+class AgentTimings(BaseModel):
+    base_duration_s: float | None
+    refined_duration_s: float | None
+    full_duration_s: float | None
+
+
+class AgentBaseMetrics(BaseModel):
+    num_verified_documents_total: int | None
+    num_verified_documents_core: int | None
+    verified_avg_score_core: float | None
+    num_verified_documents_base: int | float | None
+    verified_avg_score_base: float | None = None
+    base_doc_boost_factor: float | None = None
+    support_boost_factor: float | None = None
+    duration_s: float | None = None
+
+
+class AgentRefinedMetrics(BaseModel):
+    refined_doc_boost_factor: float | None = None
+    refined_question_boost_factor: float | None = None
+    duration_s: float | None = None
+
+
+class AgentAdditionalMetrics(BaseModel):
+    pass
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
@@ -0,0 +1,161 @@
+from datetime import datetime
+from typing import cast
+
+from langchain_core.messages import BaseMessage
+from langchain_core.messages import HumanMessage
+from langchain_core.runnables import RunnableConfig
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.deep_search.main.states import (
+    InitialRefinedAnswerComparisonUpdate,
+)
+from onyx.agents.agent_search.deep_search.main.states import MainState
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import RefinedAnswerImprovement
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
+from onyx.prompts.agent_search import (
+    INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
+)
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out, and the answers could not be compared.",
+    rate_limit="The LLM encountered a rate limit, and the answers could not be compared.",
+    general_error="The LLM encountered an error, and the answers could not be compared.",
+)
+
+_ANSWER_QUALITY_NOT_SUFFICIENT_MESSAGE = (
+    "Answer quality is not sufficient, so stay with the initial answer."
+)
+
+
+@log_function_time(print_only=True)
+def compare_answers(
+    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
+) -> InitialRefinedAnswerComparisonUpdate:
+    """
+    LangGraph node to compare the initial answer and the refined answer and determine if the
+    refined answer is sufficiently better than the initial answer.
+    """
+    node_start_time = datetime.now()
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    question = graph_config.inputs.search_request.query
+    initial_answer = state.initial_answer
+    refined_answer = state.refined_answer
+
+    # if answer quality is not sufficient, then stay with the initial answer
+    if not state.refined_answer_quality:
+        write_custom_event(
+            "refined_answer_improvement",
+            RefinedAnswerImprovement(
+                refined_answer_improvement=False,
+            ),
+            writer,
+        )
+
+        return InitialRefinedAnswerComparisonUpdate(
+            refined_answer_improvement_eval=False,
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="main",
+                    node_name="compare answers",
+                    node_start_time=node_start_time,
+                    result=_ANSWER_QUALITY_NOT_SUFFICIENT_MESSAGE,
+                )
+            ],
+        )
+
+    compare_answers_prompt = INITIAL_REFINED_ANSWER_COMPARISON_PROMPT.format(
+        question=question, initial_answer=initial_answer, refined_answer=refined_answer
+    )
+
+    msg = [HumanMessage(content=compare_answers_prompt)]
+
+    agent_error: AgentErrorLog | None = None
+    # Get the rewritten queries in a defined format
+    model = graph_config.tooling.fast_llm
+    resp: BaseMessage | None = None
+    refined_answer_improvement: bool | None = None
+    # no need to stream this
+    try:
+        resp = model.invoke(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
+        )
+
+    except LLMTimeoutError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - compare answers")
+        # continue as True in this support step
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - compare answers")
+        # continue as True in this support step
+
+    if agent_error or resp is None:
+        refined_answer_improvement = True
+        if agent_error:
+            log_result = agent_error.error_result
+        else:
+            log_result = "An answer could not be generated."
+
+    else:
+        refined_answer_improvement = binary_string_test(
+            text=cast(str, resp.content),
+            positive_value=AGENT_POSITIVE_VALUE_STR,
+        )
+        log_result = f"Answer comparison: {refined_answer_improvement}"
+
+    write_custom_event(
+        "refined_answer_improvement",
+        RefinedAnswerImprovement(
+            refined_answer_improvement=refined_answer_improvement,
+        ),
+        writer,
+    )
+
+    return InitialRefinedAnswerComparisonUpdate(
+        refined_answer_improvement_eval=refined_answer_improvement,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="main",
+                node_name="compare answers",
+                node_start_time=node_start_time,
+                result=log_result,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
@@ -0,0 +1,205 @@
+from datetime import datetime
+from typing import cast
+
+from langchain_core.messages import HumanMessage
+from langchain_core.messages import merge_content
+from langchain_core.runnables import RunnableConfig
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.deep_search.main.models import (
+    RefinementSubQuestion,
+)
+from onyx.agents.agent_search.deep_search.main.operations import dispatch_subquestion
+from onyx.agents.agent_search.deep_search.main.operations import (
+    dispatch_subquestion_sep,
+)
+from onyx.agents.agent_search.deep_search.main.states import MainState
+from onyx.agents.agent_search.deep_search.main.states import (
+    RefinedQuestionDecompositionUpdate,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    build_history_prompt,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    format_entity_term_extraction,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
+from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import StreamingError
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
+from onyx.prompts.agent_search import (
+    REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS,
+)
+from onyx.tools.models import ToolCallKickoff
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_ANSWERED_SUBQUESTIONS_DIVIDER = "\n\n---\n\n"
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out. The sub-questions could not be generated.",
+    rate_limit="The LLM encountered a rate limit. The sub-questions could not be generated.",
+    general_error="The LLM encountered an error. The sub-questions could not be generated.",
+)
+
+
+@log_function_time(print_only=True)
+def create_refined_sub_questions(
+    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
+) -> RefinedQuestionDecompositionUpdate:
+    """
+    LangGraph node to create refined sub-questions based on the initial answer, the history,
+    the entity term extraction results found earlier, and the sub-questions that were answered and failed.
+    """
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    write_custom_event(
+        "start_refined_answer_creation",
+        ToolCallKickoff(
+            tool_name="agent_search_1",
+            tool_args={
+                "query": graph_config.inputs.search_request.query,
+                "answer": state.initial_answer,
+            },
+        ),
+        writer,
+    )
+
+    node_start_time = datetime.now()
+
+    agent_refined_start_time = datetime.now()
+
+    question = graph_config.inputs.search_request.query
+    base_answer = state.initial_answer
+    history = build_history_prompt(graph_config, question)
+    # get the entity term extraction dict and properly format it
+    entity_retlation_term_extractions = state.entity_relation_term_extractions
+
+    entity_term_extraction_str = format_entity_term_extraction(
+        entity_retlation_term_extractions
+    )
+
+    initial_question_answers = state.sub_question_results
+
+    addressed_subquestions_with_answers = [
+        f"Subquestion: {x.question}\nSubanswer:\n{x.answer}"
+        for x in initial_question_answers
+        if x.verified_high_quality and x.answer
+    ]
+
+    failed_question_list = [
+        x.question for x in initial_question_answers if not x.verified_high_quality
+    ]
+
+    msg = [
+        HumanMessage(
+            content=REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS.format(
+                question=question,
+                history=history,
+                entity_term_extraction_str=entity_term_extraction_str,
+                base_answer=base_answer,
+                answered_subquestions_with_answers=_ANSWERED_SUBQUESTIONS_DIVIDER.join(
+                    addressed_subquestions_with_answers
+                ),
+                failed_sub_questions="\n - ".join(failed_question_list),
+            ),
+        )
+    ]
+
+    # Grader
+    model = graph_config.tooling.fast_llm
+
+    agent_error: AgentErrorLog | None = None
+    streamed_tokens: list[BaseMessage_Content] = []
+    try:
+        streamed_tokens = dispatch_separated(
+            model.stream(
+                msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
+            ),
+            dispatch_subquestion(1, writer),
+            sep_callback=dispatch_subquestion_sep(1, writer),
+        )
+    except LLMTimeoutError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - create refined sub questions")
+
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - create refined sub questions")
+
+    if agent_error:
+        refined_sub_question_dict: dict[int, RefinementSubQuestion] = {}
+        log_result = agent_error.error_result
+        write_custom_event(
+            "refined_sub_question_creation_error",
+            StreamingError(
+                error="Your LLM was not able to create refined sub questions in time and timed out. Please try again.",
+            ),
+            writer,
+        )
+
+    else:
+        response = merge_content(*streamed_tokens)
+
+        if isinstance(response, str):
+            parsed_response = [q for q in response.split("\n") if q.strip() != ""]
+        else:
+            raise ValueError("LLM response is not a string")
+
+        refined_sub_question_dict = {}
+        for sub_question_num, sub_question in enumerate(parsed_response):
+            refined_sub_question = RefinementSubQuestion(
+                sub_question=sub_question,
+                sub_question_id=make_question_id(1, sub_question_num + 1),
+                verified=False,
+                answered=False,
+                answer="",
+            )
+
+            refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
+
+        log_result = f"Created {len(refined_sub_question_dict)} refined sub questions"
+
+    return RefinedQuestionDecompositionUpdate(
+        refined_sub_questions=refined_sub_question_dict,
+        agent_refined_start_time=agent_refined_start_time,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="main",
+                node_name="create refined sub questions",
+                node_start_time=node_start_time,
+                result=log_result,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
@@ -0,0 +1,62 @@
+from datetime import datetime
+from typing import cast
+
+from langchain_core.runnables import RunnableConfig
+
+from onyx.agents.agent_search.deep_search.main.states import MainState
+from onyx.agents.agent_search.deep_search.main.states import (
+    RequireRefinemenEvalUpdate,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.utils.timing import log_function_time
+
+
+@log_function_time(print_only=True)
+def decide_refinement_need(
+    state: MainState, config: RunnableConfig
+) -> RequireRefinemenEvalUpdate:
+    """
+    LangGraph node to decide if refinement is needed based on the initial answer and the question.
+    At present, we always refine.
+    """
+    node_start_time = datetime.now()
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+
+    decision = True  # TODO: just for current testing purposes
+
+    if state.answer_error:
+        return RequireRefinemenEvalUpdate(
+            require_refined_answer_eval=False,
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="main",
+                    node_name="decide refinement need",
+                    node_start_time=node_start_time,
+                    result="Timeout Error",
+                )
+            ],
+        )
+
+    log_messages = [
+        get_langgraph_node_log_string(
+            graph_component="main",
+            node_name="decide refinement need",
+            node_start_time=node_start_time,
+            result=f"Refinement decision: {decision}",
+        )
+    ]
+
+    if graph_config.behavior.allow_refinement:
+        return RequireRefinemenEvalUpdate(
+            require_refined_answer_eval=decision,
+            log_messages=log_messages,
+        )
+    else:
+        return RequireRefinemenEvalUpdate(
+            require_refined_answer_eval=False,
+            log_messages=log_messages,
+        )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
@@ -0,0 +1,122 @@
+from datetime import datetime
+from typing import cast
+
+from langchain_core.messages import HumanMessage
+from langchain_core.runnables import RunnableConfig
+
+from onyx.agents.agent_search.deep_search.main.operations import logger
+from onyx.agents.agent_search.deep_search.main.states import (
+    EntityTermExtractionUpdate,
+)
+from onyx.agents.agent_search.deep_search.main.states import MainState
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    trim_prompt_piece,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import EntityExtractionResult
+from onyx.agents.agent_search.shared_graph_utils.models import (
+    EntityRelationshipTermExtraction,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
+)
+from onyx.configs.constants import NUM_EXPLORATORY_DOCS
+from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT
+from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE
+from onyx.utils.timing import log_function_time
+
+
+@log_function_time(print_only=True)
+def extract_entities_terms(
+    state: MainState, config: RunnableConfig
+) -> EntityTermExtractionUpdate:
+    """
+    LangGraph node to extract entities, relationships, and terms from the initial search results.
+    This data is used to inform particularly the sub-questions that are created for the refined answer.
+    """
+    node_start_time = datetime.now()
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    if not graph_config.behavior.allow_refinement:
+        return EntityTermExtractionUpdate(
+            entity_relation_term_extractions=EntityRelationshipTermExtraction(
+                entities=[],
+                relationships=[],
+                terms=[],
+            ),
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="main",
+                    node_name="extract entities terms",
+                    node_start_time=node_start_time,
+                    result="Refinement is not allowed",
+                )
+            ],
+        )
+
+    # first four lines duplicates from generate_initial_answer
+    question = graph_config.inputs.search_request.query
+    initial_search_docs = state.exploratory_search_results[:NUM_EXPLORATORY_DOCS]
+
+    # start with the entity/term/extraction
+    doc_context = format_docs(initial_search_docs)
+
+    # Calculation here is only approximate
+    doc_context = trim_prompt_piece(
+        graph_config.tooling.fast_llm.config,
+        doc_context,
+        ENTITY_TERM_EXTRACTION_PROMPT
+        + question
+        + ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE,
+    )
+
+    msg = [
+        HumanMessage(
+            content=ENTITY_TERM_EXTRACTION_PROMPT.format(
+                question=question, context=doc_context
+            )
+            + ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE,
+        )
+    ]
+    fast_llm = graph_config.tooling.fast_llm
+    # Grader
+    llm_response = fast_llm.invoke(
+        prompt=msg,
+        timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
+    )
+
+    cleaned_response = (
+        str(llm_response.content).replace("```json\n", "").replace("\n```", "")
+    )
+    first_bracket = cleaned_response.find("{")
+    last_bracket = cleaned_response.rfind("}")
+    cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
+
+    try:
+        entity_extraction_result = EntityExtractionResult.model_validate_json(
+            cleaned_response
+        )
+    except ValueError:
+        logger.error("Failed to parse LLM response as JSON in Entity-Term Extraction")
+        entity_extraction_result = EntityExtractionResult(
+            retrieved_entities_relationships=EntityRelationshipTermExtraction(
+                entities=[],
+                relationships=[],
+                terms=[],
+            ),
+        )
+
+    return EntityTermExtractionUpdate(
+        entity_relation_term_extractions=entity_extraction_result.retrieved_entities_relationships,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="main",
+                node_name="extract entities terms",
+                node_start_time=node_start_time,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
@@ -0,0 +1,435 @@
+from datetime import datetime
+from typing import Any
+from typing import cast
+
+from langchain_core.messages import HumanMessage
+from langchain_core.messages import merge_content
+from langchain_core.runnables import RunnableConfig
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.deep_search.main.models import (
+    AgentRefinedMetrics,
+)
+from onyx.agents.agent_search.deep_search.main.operations import get_query_info
+from onyx.agents.agent_search.deep_search.main.states import MainState
+from onyx.agents.agent_search.deep_search.main.states import (
+    RefinedAnswerUpdate,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test_after_answer_separator,
+)
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    get_prompt_enrichment_components,
+)
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    trim_prompt_piece,
+)
+from onyx.agents.agent_search.shared_graph_utils.calculations import (
+    get_answer_generation_documents,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import AGENT_ANSWER_SEPARATOR
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats
+from onyx.agents.agent_search.shared_graph_utils.operators import (
+    dedup_inference_section_list,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    dispatch_main_answer_stop_info,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_deduplicated_structured_subquestion_documents,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.agents.agent_search.shared_graph_utils.utils import relevance_from_docs
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    remove_document_citations,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import AgentAnswerPiece
+from onyx.chat.models import ExtendedToolResponse
+from onyx.chat.models import StreamingError
+from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
+from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
+from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
+from onyx.prompts.agent_search import (
+    REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS,
+)
+from onyx.prompts.agent_search import (
+    REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS,
+)
+from onyx.prompts.agent_search import (
+    REFINED_ANSWER_VALIDATION_PROMPT,
+)
+from onyx.prompts.agent_search import (
+    SUB_QUESTION_ANSWER_TEMPLATE_REFINED,
+)
+from onyx.prompts.agent_search import UNKNOWN_ANSWER
+from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out. The refined answer could not be generated.",
+    rate_limit="The LLM encountered a rate limit. The refined answer could not be generated.",
+    general_error="The LLM encountered an error. The refined answer could not be generated.",
+)
+
+
+@log_function_time(print_only=True)
+def generate_validate_refined_answer(
+    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
+) -> RefinedAnswerUpdate:
+    """
+    LangGraph node to generate the refined answer and validate it.
+    """
+
+    node_start_time = datetime.now()
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    question = graph_config.inputs.search_request.query
+    prompt_enrichment_components = get_prompt_enrichment_components(graph_config)
+
+    persona_contextualized_prompt = (
+        prompt_enrichment_components.persona_prompts.contextualized_prompt
+    )
+
+    verified_reranked_documents = state.verified_reranked_documents
+
+    # get all documents cited in sub-questions
+    structured_subquestion_docs = get_deduplicated_structured_subquestion_documents(
+        state.sub_question_results
+    )
+
+    original_question_verified_documents = (
+        state.orig_question_verified_reranked_documents
+    )
+    original_question_retrieved_documents = state.orig_question_retrieved_documents
+
+    consolidated_context_docs = structured_subquestion_docs.cited_documents
+
+    counter = 0
+    for original_doc_number, original_doc in enumerate(
+        original_question_verified_documents
+    ):
+        if original_doc_number not in structured_subquestion_docs.cited_documents:
+            if (
+                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
+                or len(consolidated_context_docs)
+                < 1.5
+                * AGENT_MAX_ANSWER_CONTEXT_DOCS  # allow for larger context in refinement
+            ):
+                consolidated_context_docs.append(original_doc)
+                counter += 1
+
+    # sort docs by their scores - though the scores refer to different questions
+    relevant_docs = dedup_inference_section_list(consolidated_context_docs)
+
+    # Create the list of documents to stream out. Start with the
+    # ones that wil be in the context (or, if len == 0, use docs
+    # that were retrieved for the original question)
+    answer_generation_documents = get_answer_generation_documents(
+        relevant_docs=relevant_docs,
+        context_documents=structured_subquestion_docs.context_documents,
+        original_question_docs=original_question_retrieved_documents,
+        max_docs=AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER,
+    )
+
+    query_info = get_query_info(state.orig_question_sub_query_retrieval_results)
+    assert (
+        graph_config.tooling.search_tool
+    ), "search_tool must be provided for agentic search"
+    # stream refined answer docs, or original question docs if no relevant docs are found
+    relevance_list = relevance_from_docs(
+        answer_generation_documents.streaming_documents
+    )
+    for tool_response in yield_search_responses(
+        query=question,
+        reranked_sections=answer_generation_documents.streaming_documents,
+        final_context_sections=answer_generation_documents.context_documents,
+        search_query_info=query_info,
+        get_section_relevance=lambda: relevance_list,
+        search_tool=graph_config.tooling.search_tool,
+    ):
+        write_custom_event(
+            "tool_response",
+            ExtendedToolResponse(
+                id=tool_response.id,
+                response=tool_response.response,
+                level=1,
+                level_question_num=0,  # 0, 0 is the base question
+            ),
+            writer,
+        )
+
+    if len(verified_reranked_documents) > 0:
+        refined_doc_effectiveness = len(relevant_docs) / len(
+            verified_reranked_documents
+        )
+    else:
+        refined_doc_effectiveness = 10.0
+
+    sub_question_answer_results = state.sub_question_results
+
+    answered_sub_question_answer_list: list[str] = []
+    sub_questions: list[str] = []
+    initial_answered_sub_questions: set[str] = set()
+    refined_answered_sub_questions: set[str] = set()
+
+    for i, result in enumerate(sub_question_answer_results, 1):
+        question_level, _ = parse_question_id(result.question_id)
+        sub_questions.append(result.question)
+
+        if (
+            result.verified_high_quality
+            and result.answer
+            and result.answer != UNKNOWN_ANSWER
+        ):
+            sub_question_type = "initial" if question_level == 0 else "refined"
+            question_set = (
+                initial_answered_sub_questions
+                if question_level == 0
+                else refined_answered_sub_questions
+            )
+            question_set.add(result.question)
+
+            answered_sub_question_answer_list.append(
+                SUB_QUESTION_ANSWER_TEMPLATE_REFINED.format(
+                    sub_question=result.question,
+                    sub_answer=result.answer,
+                    sub_question_num=i,
+                    sub_question_type=sub_question_type,
+                )
+            )
+
+    # Calculate efficiency
+    total_answered_questions = (
+        initial_answered_sub_questions | refined_answered_sub_questions
+    )
+    revision_question_efficiency = (
+        len(total_answered_questions) / len(initial_answered_sub_questions)
+        if initial_answered_sub_questions
+        else 10.0
+        if refined_answered_sub_questions
+        else 1.0
+    )
+
+    sub_question_answer_str = "\n\n------\n\n".join(
+        set(answered_sub_question_answer_list)
+    )
+    initial_answer = state.initial_answer or ""
+
+    # Choose appropriate prompt template
+    base_prompt = (
+        REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS
+        if answered_sub_question_answer_list
+        else REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS
+    )
+
+    model = graph_config.tooling.fast_llm
+    relevant_docs_str = format_docs(answer_generation_documents.context_documents)
+    relevant_docs_str = trim_prompt_piece(
+        model.config,
+        relevant_docs_str,
+        base_prompt
+        + question
+        + sub_question_answer_str
+        + initial_answer
+        + persona_contextualized_prompt
+        + prompt_enrichment_components.history,
+    )
+
+    msg = [
+        HumanMessage(
+            content=base_prompt.format(
+                question=question,
+                history=prompt_enrichment_components.history,
+                answered_sub_questions=remove_document_citations(
+                    sub_question_answer_str
+                ),
+                relevant_docs=relevant_docs_str,
+                initial_answer=remove_document_citations(initial_answer)
+                if initial_answer
+                else None,
+                persona_specification=persona_contextualized_prompt,
+                date_prompt=prompt_enrichment_components.date_str,
+            )
+        )
+    ]
+
+    streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
+    dispatch_timings: list[float] = []
+    agent_error: AgentErrorLog | None = None
+
+    try:
+        for message in model.stream(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
+        ):
+            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+            content = message.content
+            if not isinstance(content, str):
+                raise ValueError(
+                    f"Expected content to be a string, but got {type(content)}"
+                )
+
+            start_stream_token = datetime.now()
+            write_custom_event(
+                "refined_agent_answer",
+                AgentAnswerPiece(
+                    answer_piece=content,
+                    level=1,
+                    level_question_num=0,
+                    answer_type="agent_level_answer",
+                ),
+                writer,
+            )
+            end_stream_token = datetime.now()
+            dispatch_timings.append(
+                (end_stream_token - start_stream_token).microseconds
+            )
+            streamed_tokens.append(content)
+
+    except LLMTimeoutError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - generate refined answer")
+
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - generate refined answer")
+
+    if agent_error:
+        write_custom_event(
+            "initial_agent_answer",
+            StreamingError(
+                error=AGENT_LLM_TIMEOUT_MESSAGE,
+            ),
+            writer,
+        )
+
+        return RefinedAnswerUpdate(
+            refined_answer=None,
+            refined_answer_quality=False,  # TODO: replace this with the actual check value
+            refined_agent_stats=None,
+            agent_refined_end_time=None,
+            agent_refined_metrics=AgentRefinedMetrics(
+                refined_doc_boost_factor=0.0,
+                refined_question_boost_factor=0.0,
+                duration_s=None,
+            ),
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="main",
+                    node_name="generate refined answer",
+                    node_start_time=node_start_time,
+                    result=agent_error.error_result or "An LLM error occurred",
+                )
+            ],
+        )
+
+    logger.debug(
+        f"Average dispatch time for refined answer: {sum(dispatch_timings) / len(dispatch_timings)}"
+    )
+    dispatch_main_answer_stop_info(1, writer)
+    response = merge_content(*streamed_tokens)
+    answer = cast(str, response)
+
+    # run a validation step for the refined answer only
+
+    msg = [
+        HumanMessage(
+            content=REFINED_ANSWER_VALIDATION_PROMPT.format(
+                question=question,
+                history=prompt_enrichment_components.history,
+                answered_sub_questions=sub_question_answer_str,
+                relevant_docs=relevant_docs_str,
+                proposed_answer=answer,
+                persona_specification=persona_contextualized_prompt,
+            )
+        )
+    ]
+
+    try:
+        validation_response = model.invoke(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
+        )
+        refined_answer_quality = binary_string_test_after_answer_separator(
+            text=cast(str, validation_response.content),
+            positive_value=AGENT_POSITIVE_VALUE_STR,
+            separator=AGENT_ANSWER_SEPARATOR,
+        )
+    except LLMTimeoutError:
+        refined_answer_quality = True
+        logger.error("LLM Timeout Error - validate refined answer")
+
+    except LLMRateLimitError:
+        refined_answer_quality = True
+        logger.error("LLM Rate Limit Error - validate refined answer")
+
+    refined_agent_stats = RefinedAgentStats(
+        revision_doc_efficiency=refined_doc_effectiveness,
+        revision_question_efficiency=revision_question_efficiency,
+    )
+
+    agent_refined_end_time = datetime.now()
+    if state.agent_refined_start_time:
+        agent_refined_duration = (
+            agent_refined_end_time - state.agent_refined_start_time
+        ).total_seconds()
+    else:
+        agent_refined_duration = None
+
+    agent_refined_metrics = AgentRefinedMetrics(
+        refined_doc_boost_factor=refined_agent_stats.revision_doc_efficiency,
+        refined_question_boost_factor=refined_agent_stats.revision_question_efficiency,
+        duration_s=agent_refined_duration,
+    )
+
+    return RefinedAnswerUpdate(
+        refined_answer=answer,
+        refined_answer_quality=refined_answer_quality,
+        refined_agent_stats=refined_agent_stats,
+        agent_refined_end_time=agent_refined_end_time,
+        agent_refined_metrics=agent_refined_metrics,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="main",
+                node_name="generate refined answer",
+                node_start_time=node_start_time,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/ingest_refined_sub_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/ingest_refined_sub_answers.py
@@ -0,0 +1,42 @@
+from datetime import datetime
+
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionOutput,
+)
+from onyx.agents.agent_search.deep_search.main.states import (
+    SubQuestionResultsUpdate,
+)
+from onyx.agents.agent_search.shared_graph_utils.operators import (
+    dedup_inference_sections,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+
+
+def ingest_refined_sub_answers(
+    state: AnswerQuestionOutput,
+) -> SubQuestionResultsUpdate:
+    """
+    LangGraph node to ingest and format the refined sub-answers and retrieved documents.
+    """
+    node_start_time = datetime.now()
+
+    documents = []
+    answer_results = state.answer_results
+    for answer_result in answer_results:
+        documents.extend(answer_result.verified_reranked_documents)
+
+    return SubQuestionResultsUpdate(
+        # Deduping is done by the documents operator for the main graph
+        # so we might not need to dedup here
+        verified_reranked_documents=dedup_inference_sections(documents, []),
+        sub_question_results=answer_results,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="main",
+                node_name="ingest refined answers",
+                node_start_time=node_start_time,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/persist_agent_results.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/persist_agent_results.py
@@ -0,0 +1,129 @@
+from datetime import datetime
+from typing import cast
+
+from langchain_core.runnables import RunnableConfig
+
+from onyx.agents.agent_search.deep_search.main.models import (
+    AgentAdditionalMetrics,
+)
+from onyx.agents.agent_search.deep_search.main.models import AgentTimings
+from onyx.agents.agent_search.deep_search.main.operations import logger
+from onyx.agents.agent_search.deep_search.main.states import MainOutput
+from onyx.agents.agent_search.deep_search.main.states import MainState
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.models import CombinedAgentMetrics
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.db.chat import log_agent_metrics
+from onyx.db.chat import log_agent_sub_question_results
+
+
+def persist_agent_results(state: MainState, config: RunnableConfig) -> MainOutput:
+    """
+    LangGraph node to persist the agent results, including agent logging data.
+    """
+    node_start_time = datetime.now()
+
+    agent_start_time = state.agent_start_time
+    agent_base_end_time = state.agent_base_end_time
+    agent_refined_start_time = state.agent_refined_start_time
+    agent_refined_end_time = state.agent_refined_end_time
+    agent_end_time = agent_refined_end_time or agent_base_end_time
+
+    agent_base_duration = None
+    if agent_base_end_time and agent_start_time:
+        agent_base_duration = (agent_base_end_time - agent_start_time).total_seconds()
+
+    agent_refined_duration = None
+    if agent_refined_start_time and agent_refined_end_time:
+        agent_refined_duration = (
+            agent_refined_end_time - agent_refined_start_time
+        ).total_seconds()
+
+    agent_full_duration = None
+    if agent_end_time and agent_start_time:
+        agent_full_duration = (agent_end_time - agent_start_time).total_seconds()
+
+    agent_type = "refined" if agent_refined_duration else "base"
+
+    agent_base_metrics = state.agent_base_metrics
+    agent_refined_metrics = state.agent_refined_metrics
+
+    combined_agent_metrics = CombinedAgentMetrics(
+        timings=AgentTimings(
+            base_duration_s=agent_base_duration,
+            refined_duration_s=agent_refined_duration,
+            full_duration_s=agent_full_duration,
+        ),
+        base_metrics=agent_base_metrics,
+        refined_metrics=agent_refined_metrics,
+        additional_metrics=AgentAdditionalMetrics(),
+    )
+
+    persona_id = None
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    if graph_config.inputs.search_request.persona:
+        persona_id = graph_config.inputs.search_request.persona.id
+
+    user_id = None
+    assert (
+        graph_config.tooling.search_tool
+    ), "search_tool must be provided for agentic search"
+    user = graph_config.tooling.search_tool.user
+    if user:
+        user_id = user.id
+
+    # log the agent metrics
+    if graph_config.persistence:
+        if agent_base_duration is not None:
+            log_agent_metrics(
+                db_session=graph_config.persistence.db_session,
+                user_id=user_id,
+                persona_id=persona_id,
+                agent_type=agent_type,
+                start_time=agent_start_time,
+                agent_metrics=combined_agent_metrics,
+            )
+
+        # Persist the sub-answer in the database
+        db_session = graph_config.persistence.db_session
+        chat_session_id = graph_config.persistence.chat_session_id
+        primary_message_id = graph_config.persistence.message_id
+        sub_question_answer_results = state.sub_question_results
+
+        log_agent_sub_question_results(
+            db_session=db_session,
+            chat_session_id=chat_session_id,
+            primary_message_id=primary_message_id,
+            sub_question_answer_results=sub_question_answer_results,
+        )
+
+    main_output = MainOutput(
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="main",
+                node_name="persist agent results",
+                node_start_time=node_start_time,
+            )
+        ],
+    )
+
+    for log_message in state.log_messages:
+        logger.debug(log_message)
+
+    if state.agent_base_metrics:
+        logger.debug(f"Initial loop: {state.agent_base_metrics.duration_s}")
+    if state.agent_refined_metrics:
+        logger.debug(f"Refined loop: {state.agent_refined_metrics.duration_s}")
+    if (
+        state.agent_base_metrics
+        and state.agent_refined_metrics
+        and state.agent_base_metrics.duration_s
+        and state.agent_refined_metrics.duration_s
+    ):
+        logger.debug(
+            f"Total time: {float(state.agent_base_metrics.duration_s) + float(state.agent_refined_metrics.duration_s)}"
+        )
+
+    return main_output
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/start_agent_search.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/start_agent_search.py
@@ -0,0 +1,52 @@
+from datetime import datetime
+from typing import cast
+
+from langchain_core.runnables import RunnableConfig
+
+from onyx.agents.agent_search.deep_search.main.states import (
+    ExploratorySearchUpdate,
+)
+from onyx.agents.agent_search.deep_search.main.states import MainState
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    build_history_prompt,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import retrieve_search_docs
+from onyx.configs.agent_configs import AGENT_EXPLORATORY_SEARCH_RESULTS
+from onyx.context.search.models import InferenceSection
+
+
+def start_agent_search(
+    state: MainState, config: RunnableConfig
+) -> ExploratorySearchUpdate:
+    """
+    LangGraph node to start the agentic search process.
+    """
+    node_start_time = datetime.now()
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    question = graph_config.inputs.search_request.query
+
+    history = build_history_prompt(graph_config, question)
+
+    # Initial search to inform decomposition. Just get top 3 fits
+    search_tool = graph_config.tooling.search_tool
+    assert search_tool, "search_tool must be provided for agentic search"
+    retrieved_docs: list[InferenceSection] = retrieve_search_docs(search_tool, question)
+
+    exploratory_search_results = retrieved_docs[:AGENT_EXPLORATORY_SEARCH_RESULTS]
+
+    return ExploratorySearchUpdate(
+        exploratory_search_results=exploratory_search_results,
+        previous_history_summary=history,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="main",
+                node_name="start agent search",
+                node_start_time=node_start_time,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/main/operations.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/operations.py
@@ -0,0 +1,151 @@
+from collections.abc import Callable
+
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
+from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResultStats
+from onyx.agents.agent_search.shared_graph_utils.models import QueryRetrievalResult
+from onyx.agents.agent_search.shared_graph_utils.models import (
+    SubQuestionAnswerResults,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import StreamStopInfo
+from onyx.chat.models import StreamStopReason
+from onyx.chat.models import StreamType
+from onyx.chat.models import SubQuestionPiece
+from onyx.context.search.models import IndexFilters
+from onyx.tools.models import SearchQueryInfo
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def dispatch_subquestion(
+    level: int, writer: StreamWriter
+) -> Callable[[str, int], None]:
+    def _helper(sub_question_part: str, sep_num: int) -> None:
+        write_custom_event(
+            "decomp_qs",
+            SubQuestionPiece(
+                sub_question=sub_question_part,
+                level=level,
+                level_question_num=sep_num,
+            ),
+            writer,
+        )
+
+    return _helper
+
+
+def dispatch_subquestion_sep(level: int, writer: StreamWriter) -> Callable[[int], None]:
+    def _helper(sep_num: int) -> None:
+        write_custom_event(
+            "stream_finished",
+            StreamStopInfo(
+                stop_reason=StreamStopReason.FINISHED,
+                stream_type=StreamType.SUB_QUESTIONS,
+                level=level,
+                level_question_num=sep_num,
+            ),
+            writer,
+        )
+
+    return _helper
+
+
+def calculate_initial_agent_stats(
+    decomp_answer_results: list[SubQuestionAnswerResults],
+    original_question_stats: AgentChunkRetrievalStats,
+) -> InitialAgentResultStats:
+    initial_agent_result_stats: InitialAgentResultStats = InitialAgentResultStats(
+        sub_questions={},
+        original_question={},
+        agent_effectiveness={},
+    )
+
+    orig_verified = original_question_stats.verified_count
+    orig_support_score = original_question_stats.verified_avg_scores
+
+    verified_document_chunk_ids = []
+    support_scores = 0.0
+
+    for decomp_answer_result in decomp_answer_results:
+        verified_document_chunk_ids += (
+            decomp_answer_result.sub_question_retrieval_stats.verified_doc_chunk_ids
+        )
+        if (
+            decomp_answer_result.sub_question_retrieval_stats.verified_avg_scores
+            is not None
+        ):
+            support_scores += (
+                decomp_answer_result.sub_question_retrieval_stats.verified_avg_scores
+            )
+
+    verified_document_chunk_ids = list(set(verified_document_chunk_ids))
+
+    # Calculate sub-question stats
+    if (
+        verified_document_chunk_ids
+        and len(verified_document_chunk_ids) > 0
+        and support_scores is not None
+    ):
+        sub_question_stats: dict[str, float | int | None] = {
+            "num_verified_documents": len(verified_document_chunk_ids),
+            "verified_avg_score": float(support_scores / len(decomp_answer_results)),
+        }
+    else:
+        sub_question_stats = {"num_verified_documents": 0, "verified_avg_score": None}
+
+    initial_agent_result_stats.sub_questions.update(sub_question_stats)
+
+    # Get original question stats
+    initial_agent_result_stats.original_question.update(
+        {
+            "num_verified_documents": original_question_stats.verified_count,
+            "verified_avg_score": original_question_stats.verified_avg_scores,
+        }
+    )
+
+    # Calculate chunk utilization ratio
+    sub_verified = initial_agent_result_stats.sub_questions["num_verified_documents"]
+
+    chunk_ratio: float | None = None
+    if sub_verified is not None and orig_verified is not None and orig_verified > 0:
+        chunk_ratio = (float(sub_verified) / orig_verified) if sub_verified > 0 else 0.0
+    elif sub_verified is not None and sub_verified > 0:
+        chunk_ratio = 10.0
+
+    initial_agent_result_stats.agent_effectiveness["utilized_chunk_ratio"] = chunk_ratio
+
+    if (
+        orig_support_score is None
+        or orig_support_score == 0.0
+        and initial_agent_result_stats.sub_questions["verified_avg_score"] is None
+    ):
+        initial_agent_result_stats.agent_effectiveness["support_ratio"] = None
+    elif orig_support_score is None or orig_support_score == 0.0:
+        initial_agent_result_stats.agent_effectiveness["support_ratio"] = 10
+    elif initial_agent_result_stats.sub_questions["verified_avg_score"] is None:
+        initial_agent_result_stats.agent_effectiveness["support_ratio"] = 0
+    else:
+        initial_agent_result_stats.agent_effectiveness["support_ratio"] = (
+            initial_agent_result_stats.sub_questions["verified_avg_score"]
+            / orig_support_score
+        )
+
+    return initial_agent_result_stats
+
+
+def get_query_info(results: list[QueryRetrievalResult]) -> SearchQueryInfo:
+    # Use the query info from the base document retrieval
+    # this is used for some fields that are the same across the searches done
+    query_info = None
+    for result in results:
+        if result.query_info is not None:
+            query_info = result.query_info
+            break
+    return query_info or SearchQueryInfo(
+        predicted_search=None,
+        final_filters=IndexFilters(access_control_list=None),
+        recency_bias_multiplier=1.0,
+    )
--- a/backend/onyx/agents/agent_search/deep_search/main/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/states.py
@@ -0,0 +1,175 @@
+from datetime import datetime
+from operator import add
+from typing import Annotated
+from typing import TypedDict
+
+from pydantic import BaseModel
+
+from onyx.agents.agent_search.core_state import CoreState
+from onyx.agents.agent_search.deep_search.main.models import AgentBaseMetrics
+from onyx.agents.agent_search.deep_search.main.models import (
+    AgentRefinedMetrics,
+)
+from onyx.agents.agent_search.deep_search.main.models import (
+    RefinementSubQuestion,
+)
+from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
+from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
+from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
+from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import (
+    EntityRelationshipTermExtraction,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResultStats
+from onyx.agents.agent_search.shared_graph_utils.models import QueryRetrievalResult
+from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats
+from onyx.agents.agent_search.shared_graph_utils.models import (
+    SubQuestionAnswerResults,
+)
+from onyx.agents.agent_search.shared_graph_utils.operators import (
+    dedup_inference_sections,
+)
+from onyx.agents.agent_search.shared_graph_utils.operators import (
+    dedup_question_answer_results,
+)
+from onyx.context.search.models import InferenceSection
+
+
+### States ###
+class LoggerUpdate(BaseModel):
+    log_messages: Annotated[list[str], add] = []
+
+
+class RefinedAgentStartStats(BaseModel):
+    agent_refined_start_time: datetime | None = None
+
+
+class RefinedAgentEndStats(BaseModel):
+    agent_refined_end_time: datetime | None = None
+    agent_refined_metrics: AgentRefinedMetrics = AgentRefinedMetrics()
+
+
+class InitialQuestionDecompositionUpdate(
+    RefinedAgentStartStats, RefinedAgentEndStats, LoggerUpdate
+):
+    agent_start_time: datetime | None = None
+    previous_history: str | None = None
+    initial_sub_questions: list[str] = []
+
+
+class ExploratorySearchUpdate(LoggerUpdate):
+    exploratory_search_results: list[InferenceSection] = []
+    previous_history_summary: str | None = None
+
+
+class InitialRefinedAnswerComparisonUpdate(LoggerUpdate):
+    """
+    Evaluation of whether the refined answer is better than the initial answer
+    """
+
+    refined_answer_improvement_eval: bool = False
+
+
+class InitialAnswerUpdate(LoggerUpdate):
+    """
+    Initial answer information
+    """
+
+    initial_answer: str | None = None
+    answer_error: AgentErrorLog | None = None
+    initial_agent_stats: InitialAgentResultStats | None = None
+    generated_sub_questions: list[str] = []
+    agent_base_end_time: datetime | None = None
+    agent_base_metrics: AgentBaseMetrics | None = None
+
+
+class RefinedAnswerUpdate(RefinedAgentEndStats, LoggerUpdate):
+    """
+    Refined answer information
+    """
+
+    refined_answer: str | None = None
+    answer_error: AgentErrorLog | None = None
+    refined_agent_stats: RefinedAgentStats | None = None
+    refined_answer_quality: bool = False
+
+
+class InitialAnswerQualityUpdate(LoggerUpdate):
+    """
+    Initial answer quality evaluation
+    """
+
+    initial_answer_quality_eval: bool = False
+
+
+class RequireRefinemenEvalUpdate(LoggerUpdate):
+    require_refined_answer_eval: bool = True
+
+
+class SubQuestionResultsUpdate(LoggerUpdate):
+    verified_reranked_documents: Annotated[
+        list[InferenceSection], dedup_inference_sections
+    ] = []
+    context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
+    cited_documents: Annotated[
+        list[InferenceSection], dedup_inference_sections
+    ] = []  # cited docs from sub-answers are used for answer context
+    sub_question_results: Annotated[
+        list[SubQuestionAnswerResults], dedup_question_answer_results
+    ] = []
+
+
+class OrigQuestionRetrievalUpdate(LoggerUpdate):
+    orig_question_retrieved_documents: Annotated[
+        list[InferenceSection], dedup_inference_sections
+    ]
+    orig_question_verified_reranked_documents: Annotated[
+        list[InferenceSection], dedup_inference_sections
+    ]
+    orig_question_sub_query_retrieval_results: list[QueryRetrievalResult] = []
+    orig_question_retrieval_stats: AgentChunkRetrievalStats = AgentChunkRetrievalStats()
+
+
+class EntityTermExtractionUpdate(LoggerUpdate):
+    entity_relation_term_extractions: EntityRelationshipTermExtraction = (
+        EntityRelationshipTermExtraction()
+    )
+
+
+class RefinedQuestionDecompositionUpdate(RefinedAgentStartStats, LoggerUpdate):
+    refined_sub_questions: dict[int, RefinementSubQuestion] = {}
+
+
+## Graph Input State
+class MainInput(CoreState):
+    pass
+
+
+## Graph State
+class MainState(
+    # This includes the core state
+    MainInput,
+    ToolChoiceInput,
+    ToolCallUpdate,
+    ToolChoiceUpdate,
+    InitialQuestionDecompositionUpdate,
+    InitialAnswerUpdate,
+    SubQuestionResultsUpdate,
+    OrigQuestionRetrievalUpdate,
+    EntityTermExtractionUpdate,
+    InitialAnswerQualityUpdate,
+    RequireRefinemenEvalUpdate,
+    RefinedQuestionDecompositionUpdate,
+    RefinedAnswerUpdate,
+    RefinedAgentStartStats,
+    RefinedAgentEndStats,
+    InitialRefinedAnswerComparisonUpdate,
+    ExploratorySearchUpdate,
+):
+    pass
+
+
+## Graph Output State - presently not used
+class MainOutput(TypedDict):
+    log_messages: list[str]
--- a/backend/onyx/agents/agent_search/deep_search/refinement/consolidate_sub_answers/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/refinement/consolidate_sub_answers/edges.py
@@ -0,0 +1,33 @@
+from collections.abc import Hashable
+from datetime import datetime
+
+from langgraph.types import Send
+
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    SubQuestionAnsweringInput,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalInput,
+)
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def send_to_expanded_refined_retrieval(
+    state: SubQuestionAnsweringInput,
+) -> Send | Hashable:
+    """
+    LangGraph edge to sends a refined sub-question extended retrieval.
+    """
+    logger.debug("sending to expanded retrieval for follow up question via edge")
+    datetime.now()
+    return Send(
+        "refined_sub_question_expanded_retrieval",
+        ExpandedRetrievalInput(
+            question=state.question,
+            sub_question_id=state.question_id,
+            base_search=False,
+            log_messages=[f"{datetime.now()} -- Sending to expanded retrieval"],
+        ),
+    )
--- a/backend/onyx/agents/agent_search/deep_search/refinement/consolidate_sub_answers/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/refinement/consolidate_sub_answers/graph_builder.py
@@ -0,0 +1,132 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.nodes.check_sub_answer import (
+    check_sub_answer,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.nodes.format_sub_answer import (
+    format_sub_answer,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.nodes.generate_sub_answer import (
+    generate_sub_answer,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.nodes.ingest_retrieved_documents import (
+    ingest_retrieved_documents,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionOutput,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    AnswerQuestionState,
+)
+from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
+    SubQuestionAnsweringInput,
+)
+from onyx.agents.agent_search.deep_search.refinement.consolidate_sub_answers.edges import (
+    send_to_expanded_refined_retrieval,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.graph_builder import (
+    expanded_retrieval_graph_builder,
+)
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def answer_refined_query_graph_builder() -> StateGraph:
+    """
+    LangGraph graph builder for the refined sub-answer generation process.
+    """
+    graph = StateGraph(
+        state_schema=AnswerQuestionState,
+        input=SubQuestionAnsweringInput,
+        output=AnswerQuestionOutput,
+    )
+
+    ### Add nodes ###
+
+    # Subgraph for the expanded retrieval process
+    expanded_retrieval = expanded_retrieval_graph_builder().compile()
+    graph.add_node(
+        node="refined_sub_question_expanded_retrieval",
+        action=expanded_retrieval,
+    )
+
+    # Ingest the retrieved documents
+    graph.add_node(
+        node="ingest_refined_retrieval",
+        action=ingest_retrieved_documents,
+    )
+
+    # Generate the refined sub-answer
+    graph.add_node(
+        node="generate_refined_sub_answer",
+        action=generate_sub_answer,
+    )
+
+    # Check if the refined sub-answer is correct
+    graph.add_node(
+        node="refined_sub_answer_check",
+        action=check_sub_answer,
+    )
+
+    # Format the refined sub-answer
+    graph.add_node(
+        node="format_refined_sub_answer",
+        action=format_sub_answer,
+    )
+
+    ### Add edges ###
+
+    graph.add_conditional_edges(
+        source=START,
+        path=send_to_expanded_refined_retrieval,
+        path_map=["refined_sub_question_expanded_retrieval"],
+    )
+    graph.add_edge(
+        start_key="refined_sub_question_expanded_retrieval",
+        end_key="ingest_refined_retrieval",
+    )
+    graph.add_edge(
+        start_key="ingest_refined_retrieval",
+        end_key="generate_refined_sub_answer",
+    )
+    graph.add_edge(
+        start_key="generate_refined_sub_answer",
+        end_key="refined_sub_answer_check",
+    )
+    graph.add_edge(
+        start_key="refined_sub_answer_check",
+        end_key="format_refined_sub_answer",
+    )
+    graph.add_edge(
+        start_key="format_refined_sub_answer",
+        end_key=END,
+    )
+
+    return graph
+
+
+if __name__ == "__main__":
+    from onyx.db.engine import get_session_context_manager
+    from onyx.llm.factory import get_default_llms
+    from onyx.context.search.models import SearchRequest
+
+    graph = answer_refined_query_graph_builder()
+    compiled_graph = graph.compile()
+    primary_llm, fast_llm = get_default_llms()
+    search_request = SearchRequest(
+        query="what can you do with onyx or danswer?",
+    )
+    with get_session_context_manager() as db_session:
+        inputs = SubQuestionAnsweringInput(
+            question="what can you do with onyx?",
+            question_id="0_0",
+            log_messages=[],
+        )
+        for thing in compiled_graph.stream(
+            input=inputs,
+            stream_mode="custom",
+        ):
+            logger.debug(thing)
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/edges.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/edges.py
@@ -0,0 +1,42 @@
+from collections.abc import Hashable
+from typing import cast
+
+from langchain_core.runnables.config import RunnableConfig
+from langgraph.types import Send
+
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalState,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    RetrievalInput,
+)
+from onyx.agents.agent_search.models import GraphConfig
+
+
+def parallel_retrieval_edge(
+    state: ExpandedRetrievalState, config: RunnableConfig
+) -> list[Send | Hashable]:
+    """
+    LangGraph edge to parallelize the retrieval process for each of the
+    generated sub-queries and the original question.
+    """
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    question = (
+        state.question if state.question else graph_config.inputs.search_request.query
+    )
+
+    query_expansions = state.expanded_queries + [question]
+
+    return [
+        Send(
+            "retrieve_documents",
+            RetrievalInput(
+                query_to_retrieve=query,
+                question=question,
+                base_search=False,
+                sub_question_id=state.sub_question_id,
+                log_messages=[],
+            ),
+        )
+        for query in query_expansions
+    ]
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/graph_builder.py
@@ -0,0 +1,161 @@
+from langgraph.graph import END
+from langgraph.graph import START
+from langgraph.graph import StateGraph
+
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.edges import (
+    parallel_retrieval_edge,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.nodes.expand_queries import (
+    expand_queries,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.nodes.format_queries import (
+    format_queries,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.nodes.format_results import (
+    format_results,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.nodes.kickoff_verification import (
+    kickoff_verification,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.nodes.rerank_documents import (
+    rerank_documents,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.nodes.retrieve_documents import (
+    retrieve_documents,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.nodes.verify_documents import (
+    verify_documents,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalInput,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalOutput,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalState,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def expanded_retrieval_graph_builder() -> StateGraph:
+    """
+    LangGraph graph builder for the expanded retrieval process.
+    """
+    graph = StateGraph(
+        state_schema=ExpandedRetrievalState,
+        input=ExpandedRetrievalInput,
+        output=ExpandedRetrievalOutput,
+    )
+
+    ### Add nodes ###
+
+    # Convert the question into multiple sub-queries
+    graph.add_node(
+        node="expand_queries",
+        action=expand_queries,
+    )
+
+    # Format the sub-queries into a list of strings
+    graph.add_node(
+        node="format_queries",
+        action=format_queries,
+    )
+
+    # Retrieve the documents for each sub-query
+    graph.add_node(
+        node="retrieve_documents",
+        action=retrieve_documents,
+    )
+
+    # Start verification process that the documents are relevant to the question (not the query)
+    graph.add_node(
+        node="kickoff_verification",
+        action=kickoff_verification,
+    )
+
+    # Verify that a given document is relevant to the question (not the query)
+    graph.add_node(
+        node="verify_documents",
+        action=verify_documents,
+    )
+
+    # Rerank the documents that have been verified
+    graph.add_node(
+        node="rerank_documents",
+        action=rerank_documents,
+    )
+
+    # Format the results into a list of strings
+    graph.add_node(
+        node="format_results",
+        action=format_results,
+    )
+
+    ### Add edges ###
+    graph.add_edge(
+        start_key=START,
+        end_key="expand_queries",
+    )
+    graph.add_edge(
+        start_key="expand_queries",
+        end_key="format_queries",
+    )
+
+    graph.add_conditional_edges(
+        source="format_queries",
+        path=parallel_retrieval_edge,
+        path_map=["retrieve_documents"],
+    )
+    graph.add_edge(
+        start_key="retrieve_documents",
+        end_key="kickoff_verification",
+    )
+    graph.add_edge(
+        start_key="verify_documents",
+        end_key="rerank_documents",
+    )
+    graph.add_edge(
+        start_key="rerank_documents",
+        end_key="format_results",
+    )
+    graph.add_edge(
+        start_key="format_results",
+        end_key=END,
+    )
+
+    return graph
+
+
+if __name__ == "__main__":
+    from onyx.db.engine import get_session_context_manager
+    from onyx.llm.factory import get_default_llms
+    from onyx.context.search.models import SearchRequest
+
+    graph = expanded_retrieval_graph_builder()
+    compiled_graph = graph.compile()
+    primary_llm, fast_llm = get_default_llms()
+    search_request = SearchRequest(
+        query="what can you do with onyx or danswer?",
+    )
+
+    with get_session_context_manager() as db_session:
+        graph_config, search_tool = get_test_config(
+            db_session, primary_llm, fast_llm, search_request
+        )
+        inputs = ExpandedRetrievalInput(
+            question="what can you do with onyx?",
+            base_search=False,
+            sub_question_id=None,
+            log_messages=[],
+        )
+        for thing in compiled_graph.stream(
+            input=inputs,
+            config={"configurable": {"config": graph_config}},
+            stream_mode="custom",
+            subgraphs=True,
+        ):
+            logger.debug(thing)
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/models.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/models.py
@@ -0,0 +1,13 @@
+from pydantic import BaseModel
+
+from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
+from onyx.agents.agent_search.shared_graph_utils.models import QueryRetrievalResult
+from onyx.context.search.models import InferenceSection
+
+
+class QuestionRetrievalResult(BaseModel):
+    expanded_query_results: list[QueryRetrievalResult] = []
+    retrieved_documents: list[InferenceSection] = []
+    verified_reranked_documents: list[InferenceSection] = []
+    context_documents: list[InferenceSection] = []
+    retrieval_stats: AgentChunkRetrievalStats = AgentChunkRetrievalStats()
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
@@ -0,0 +1,133 @@
+from datetime import datetime
+from typing import cast
+
+from langchain_core.messages import HumanMessage
+from langchain_core.messages import merge_message_runs
+from langchain_core.runnables.config import RunnableConfig
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.operations import (
+    dispatch_subquery,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalInput,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    QueryExpansionUpdate,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
+from onyx.prompts.agent_search import (
+    QUERY_REWRITING_PROMPT,
+)
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="Query rewriting failed due to LLM timeout - the original question will be used.",
+    rate_limit="Query rewriting failed due to LLM rate limit - the original question will be used.",
+    general_error="Query rewriting failed due to LLM error - the original question will be used.",
+)
+
+
+@log_function_time(print_only=True)
+def expand_queries(
+    state: ExpandedRetrievalInput,
+    config: RunnableConfig,
+    writer: StreamWriter = lambda _: None,
+) -> QueryExpansionUpdate:
+    """
+    LangGraph node to expand a question into multiple search queries.
+    """
+    # Sometimes we want to expand the original question, sometimes we want to expand a sub-question.
+    # When we are running this node on the original question, no question is explictly passed in.
+    # Instead, we use the original question from the search request.
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    node_start_time = datetime.now()
+    question = state.question
+
+    llm = graph_config.tooling.fast_llm
+    sub_question_id = state.sub_question_id
+    if sub_question_id is None:
+        level, question_num = 0, 0
+    else:
+        level, question_num = parse_question_id(sub_question_id)
+
+    msg = [
+        HumanMessage(
+            content=QUERY_REWRITING_PROMPT.format(question=question),
+        )
+    ]
+
+    agent_error: AgentErrorLog | None = None
+    llm_response_list: list[BaseMessage_Content] = []
+    llm_response = ""
+    rewritten_queries = []
+
+    try:
+        llm_response_list = dispatch_separated(
+            llm.stream(
+                prompt=msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
+            ),
+            dispatch_subquery(level, question_num, writer),
+        )
+        llm_response = merge_message_runs(llm_response_list, chunk_separator="")[
+            0
+        ].content
+        rewritten_queries = llm_response.split("\n")
+        log_result = f"Number of expanded queries: {len(rewritten_queries)}"
+
+    except LLMTimeoutError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - expand queries")
+        log_result = agent_error.error_result
+
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - expand queries")
+        log_result = agent_error.error_result
+    # use subquestion as query if query generation fails
+
+    return QueryExpansionUpdate(
+        expanded_queries=rewritten_queries,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="shared - expanded retrieval",
+                node_name="expand queries",
+                node_start_time=node_start_time,
+                result=log_result,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/format_queries.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/format_queries.py
@@ -0,0 +1,19 @@
+from langchain_core.runnables.config import RunnableConfig
+
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalState,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    QueryExpansionUpdate,
+)
+
+
+def format_queries(
+    state: ExpandedRetrievalState, config: RunnableConfig
+) -> QueryExpansionUpdate:
+    """
+    LangGraph node to format the expanded queries into a list of strings.
+    """
+    return QueryExpansionUpdate(
+        expanded_queries=state.expanded_queries,
+    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/format_results.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/format_results.py
@@ -0,0 +1,91 @@
+from typing import cast
+
+from langchain_core.runnables.config import RunnableConfig
+from langgraph.types import StreamWriter
+
+from onyx.agents.agent_search.deep_search.main.operations import get_query_info
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.models import (
+    QuestionRetrievalResult,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.operations import (
+    calculate_sub_question_retrieval_stats,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalState,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalUpdate,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
+from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.agents.agent_search.shared_graph_utils.utils import relevance_from_docs
+from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import ExtendedToolResponse
+from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
+
+
+def format_results(
+    state: ExpandedRetrievalState,
+    config: RunnableConfig,
+    writer: StreamWriter = lambda _: None,
+) -> ExpandedRetrievalUpdate:
+    """
+    LangGraph node that constructs the proper expanded retrieval format.
+    """
+    level, question_num = parse_question_id(state.sub_question_id or "0_0")
+    query_info = get_query_info(state.query_retrieval_results)
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+
+    # Main question docs will be sent later after aggregation and deduping with sub-question docs
+    reranked_documents = state.reranked_documents
+
+    if not (level == 0 and question_num == 0):
+        if len(reranked_documents) == 0:
+            # The sub-question is used as the last query. If no verified documents are found, stream
+            # the top 3 for that one. We may want to revisit this.
+            reranked_documents = state.query_retrieval_results[-1].retrieved_documents[
+                :3
+            ]
+
+        assert (
+            graph_config.tooling.search_tool
+        ), "search_tool must be provided for agentic search"
+
+        relevance_list = relevance_from_docs(reranked_documents)
+        for tool_response in yield_search_responses(
+            query=state.question,
+            reranked_sections=state.retrieved_documents,
+            final_context_sections=reranked_documents,
+            search_query_info=query_info,
+            get_section_relevance=lambda: relevance_list,
+            search_tool=graph_config.tooling.search_tool,
+        ):
+            write_custom_event(
+                "tool_response",
+                ExtendedToolResponse(
+                    id=tool_response.id,
+                    response=tool_response.response,
+                    level=level,
+                    level_question_num=question_num,
+                ),
+                writer,
+            )
+    sub_question_retrieval_stats = calculate_sub_question_retrieval_stats(
+        verified_documents=state.verified_documents,
+        expanded_retrieval_results=state.query_retrieval_results,
+    )
+
+    if sub_question_retrieval_stats is None:
+        sub_question_retrieval_stats = AgentChunkRetrievalStats()
+
+    return ExpandedRetrievalUpdate(
+        expanded_retrieval_result=QuestionRetrievalResult(
+            expanded_query_results=state.query_retrieval_results,
+            retrieved_documents=state.retrieved_documents,
+            verified_reranked_documents=reranked_documents,
+            context_documents=state.reranked_documents,
+            retrieval_stats=sub_question_retrieval_stats,
+        ),
+    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/kickoff_verification.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/kickoff_verification.py
@@ -0,0 +1,44 @@
+from typing import Literal
+
+from langchain_core.runnables.config import RunnableConfig
+from langgraph.types import Command
+from langgraph.types import Send
+
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    DocVerificationInput,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalState,
+)
+
+
+def kickoff_verification(
+    state: ExpandedRetrievalState,
+    config: RunnableConfig,
+) -> Command[Literal["verify_documents"]]:
+    """
+    LangGraph node (Command node!) that kicks off the verification process for the retrieved documents.
+    Note that this is a Command node and does the routing as well. (At present, no state updates
+    are done here, so this could be replaced with an edge. But we may choose to make state
+    updates later.)
+    """
+    retrieved_documents = state.retrieved_documents
+    verification_question = state.question
+
+    sub_question_id = state.sub_question_id
+    return Command(
+        update={},
+        goto=[
+            Send(
+                node="verify_documents",
+                arg=DocVerificationInput(
+                    retrieved_document_to_verify=document,
+                    question=verification_question,
+                    base_search=False,
+                    sub_question_id=sub_question_id,
+                    log_messages=[],
+                ),
+            )
+            for document in retrieved_documents
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
@@ -0,0 +1,99 @@
+from datetime import datetime
+from typing import cast
+
+from langchain_core.runnables.config import RunnableConfig
+
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.operations import (
+    logger,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    DocRerankingUpdate,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    ExpandedRetrievalState,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.calculations import get_fit_scores
+from onyx.agents.agent_search.shared_graph_utils.models import RetrievalFitStats
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.configs.agent_configs import AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS
+from onyx.configs.agent_configs import AGENT_RERANKING_STATS
+from onyx.context.search.models import InferenceSection
+from onyx.context.search.models import RerankingDetails
+from onyx.context.search.postprocessing.postprocessing import rerank_sections
+from onyx.context.search.postprocessing.postprocessing import should_rerank
+from onyx.db.engine import get_session_context_manager
+from onyx.db.search_settings import get_current_search_settings
+from onyx.utils.timing import log_function_time
+
+
+@log_function_time(print_only=True)
+def rerank_documents(
+    state: ExpandedRetrievalState, config: RunnableConfig
+) -> DocRerankingUpdate:
+    """
+    LangGraph node to rerank the retrieved and verified documents. A part of the
+    pre-existing pipeline is used here.
+    """
+    node_start_time = datetime.now()
+    verified_documents = state.verified_documents
+
+    # Rerank post retrieval and verification. First, create a search query
+    # then create the list of reranked sections
+    # If no question defined/question is None in the state, use the original
+    # question from the search request as query
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    question = (
+        state.question if state.question else graph_config.inputs.search_request.query
+    )
+    assert (
+        graph_config.tooling.search_tool
+    ), "search_tool must be provided for agentic search"
+
+    # Note that these are passed in values from the API and are overrides which are typically None
+    rerank_settings = graph_config.inputs.search_request.rerank_settings
+
+    if rerank_settings is None:
+        with get_session_context_manager() as db_session:
+            search_settings = get_current_search_settings(db_session)
+            if not search_settings.disable_rerank_for_streaming:
+                rerank_settings = RerankingDetails.from_db_model(search_settings)
+
+    if should_rerank(rerank_settings) and len(verified_documents) > 0:
+        if len(verified_documents) > 1:
+            reranked_documents = rerank_sections(
+                query_str=question,
+                # if runnable, then rerank_settings is not None
+                rerank_settings=cast(RerankingDetails, rerank_settings),
+                sections_to_rerank=verified_documents,
+            )
+        else:
+            logger.warning(
+                f"{len(verified_documents)} verified document(s) found, skipping reranking"
+            )
+            reranked_documents = verified_documents
+    else:
+        logger.warning("No reranking settings found, using unranked documents")
+        reranked_documents = verified_documents
+
+    if AGENT_RERANKING_STATS:
+        fit_scores = get_fit_scores(verified_documents, reranked_documents)
+    else:
+        fit_scores = RetrievalFitStats(fit_score_lift=0, rerank_effect=0, fit_scores={})
+
+    return DocRerankingUpdate(
+        reranked_documents=[
+            doc for doc in reranked_documents if type(doc) == InferenceSection
+        ][:AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS],
+        sub_question_retrieval_stats=fit_scores,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="shared - expanded retrieval",
+                node_name="rerank documents",
+                node_start_time=node_start_time,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/retrieve_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/retrieve_documents.py
@@ -0,0 +1,119 @@
+from datetime import datetime
+from typing import cast
+
+from langchain_core.runnables.config import RunnableConfig
+
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.operations import (
+    logger,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    DocRetrievalUpdate,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    RetrievalInput,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.calculations import get_fit_scores
+from onyx.agents.agent_search.shared_graph_utils.models import QueryRetrievalResult
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.configs.agent_configs import AGENT_MAX_QUERY_RETRIEVAL_RESULTS
+from onyx.configs.agent_configs import AGENT_RETRIEVAL_STATS
+from onyx.context.search.models import InferenceSection
+from onyx.db.engine import get_session_context_manager
+from onyx.tools.models import SearchQueryInfo
+from onyx.tools.models import SearchToolOverrideKwargs
+from onyx.tools.tool_implementations.search.search_tool import (
+    SEARCH_RESPONSE_SUMMARY_ID,
+)
+from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
+from onyx.utils.timing import log_function_time
+
+
+@log_function_time(print_only=True)
+def retrieve_documents(
+    state: RetrievalInput, config: RunnableConfig
+) -> DocRetrievalUpdate:
+    """
+    LangGraph node to retrieve documents from the search tool.
+    """
+    node_start_time = datetime.now()
+    query_to_retrieve = state.query_to_retrieve
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    search_tool = graph_config.tooling.search_tool
+
+    retrieved_docs: list[InferenceSection] = []
+    if not query_to_retrieve.strip():
+        logger.warning("Empty query, skipping retrieval")
+
+        return DocRetrievalUpdate(
+            query_retrieval_results=[],
+            retrieved_documents=[],
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="shared - expanded retrieval",
+                    node_name="retrieve documents",
+                    node_start_time=node_start_time,
+                    result="Empty query, skipping retrieval",
+                )
+            ],
+        )
+
+    query_info = None
+    if search_tool is None:
+        raise ValueError("search_tool must be provided for agentic search")
+
+    callback_container: list[list[InferenceSection]] = []
+
+    # new db session to avoid concurrency issues
+    with get_session_context_manager() as db_session:
+        for tool_response in search_tool.run(
+            query=query_to_retrieve,
+            override_kwargs=SearchToolOverrideKwargs(
+                force_no_rerank=True,
+                alternate_db_session=db_session,
+                retrieved_sections_callback=callback_container.append,
+                skip_query_analysis=not state.base_search,
+            ),
+        ):
+            # get retrieved docs to send to the rest of the graph
+            if tool_response.id == SEARCH_RESPONSE_SUMMARY_ID:
+                response = cast(SearchResponseSummary, tool_response.response)
+                retrieved_docs = response.top_sections
+                query_info = SearchQueryInfo(
+                    predicted_search=response.predicted_search,
+                    final_filters=response.final_filters,
+                    recency_bias_multiplier=response.recency_bias_multiplier,
+                )
+                break
+
+    retrieved_docs = retrieved_docs[:AGENT_MAX_QUERY_RETRIEVAL_RESULTS]
+
+    if AGENT_RETRIEVAL_STATS:
+        pre_rerank_docs = callback_container[0]
+        fit_scores = get_fit_scores(
+            pre_rerank_docs,
+            retrieved_docs,
+        )
+    else:
+        fit_scores = None
+
+    expanded_retrieval_result = QueryRetrievalResult(
+        query=query_to_retrieve,
+        retrieved_documents=retrieved_docs,
+        stats=fit_scores,
+        query_info=query_info,
+    )
+
+    return DocRetrievalUpdate(
+        query_retrieval_results=[expanded_retrieval_result],
+        retrieved_documents=retrieved_docs,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="shared - expanded retrieval",
+                node_name="retrieve documents",
+                node_start_time=node_start_time,
+            )
+        ],
+    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
@@ -0,0 +1,118 @@
+from datetime import datetime
+from typing import cast
+
+from langchain_core.messages import BaseMessage
+from langchain_core.messages import HumanMessage
+from langchain_core.runnables.config import RunnableConfig
+
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    DocVerificationInput,
+)
+from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states import (
+    DocVerificationUpdate,
+)
+from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test,
+)
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    trim_prompt_piece,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
+from onyx.prompts.agent_search import (
+    DOCUMENT_VERIFICATION_PROMPT,
+)
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out. The document could not be verified. The document will be treated as 'relevant'",
+    rate_limit="The LLM encountered a rate limit. The document could not be verified. The document will be treated as 'relevant'",
+    general_error="The LLM encountered an error. The document could not be verified. The document will be treated as 'relevant'",
+)
+
+
+@log_function_time(print_only=True)
+def verify_documents(
+    state: DocVerificationInput, config: RunnableConfig
+) -> DocVerificationUpdate:
+    """
+    LangGraph node to check whether the document is relevant for the original user question
+
+    Args:
+        state (DocVerificationInput): The current state
+        config (RunnableConfig): Configuration containing AgentSearchConfig
+
+    Updates:
+        verified_documents: list[InferenceSection]
+    """
+
+    node_start_time = datetime.now()
+
+    question = state.question
+    retrieved_document_to_verify = state.retrieved_document_to_verify
+    document_content = retrieved_document_to_verify.combined_content
+
+    graph_config = cast(GraphConfig, config["metadata"]["config"])
+    fast_llm = graph_config.tooling.fast_llm
+
+    document_content = trim_prompt_piece(
+        fast_llm.config, document_content, DOCUMENT_VERIFICATION_PROMPT + question
+    )
+
+    msg = [
+        HumanMessage(
+            content=DOCUMENT_VERIFICATION_PROMPT.format(
+                question=question, document_content=document_content
+            )
+        )
+    ]
+
+    response: BaseMessage | None = None
+
+    verified_documents = [
+        retrieved_document_to_verify
+    ]  # default is to treat document as relevant
+
+    try:
+        response = fast_llm.invoke(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
+        )
+
+        assert isinstance(response.content, str)
+        if not binary_string_test(
+            text=response.content, positive_value=AGENT_POSITIVE_VALUE_STR
+        ):
+            verified_documents = []
+
+    except LLMTimeoutError:
+        # In this case, we decide to continue and don't raise an error, as
+        # little harm in letting some docs through that are less relevant.
+        logger.error("LLM Timeout Error - verify documents")
+
+    except LLMRateLimitError:
+        # In this case, we decide to continue and don't raise an error, as
+        # little harm in letting some docs through that are less relevant.
+        logger.error("LLM Rate Limit Error - verify documents")
+
+    return DocVerificationUpdate(
+        verified_documents=verified_documents,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="shared - expanded retrieval",
+                node_name="verify documents",
+                node_start_time=node_start_time,
+            )
+        ],
+    )
--- a/Show More
+++ b/Show More