k

Reduce background errors (#4004 )
bump version and fix related issues (#3996 )
2026-02-18 16:25:45 +00:00 · 2025-02-15 11:22:37 -08:00 · 2025-02-14 17:35:26 -08:00 · 2025-02-14 19:57:12 +00:00 · 2025-02-14 19:40:21 +00:00 · 2025-02-14 02:33:42 +00:00
140 changed files with 4670 additions and 1504 deletions
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -65,6 +65,7 @@ jobs:
            NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }}
            NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }}
            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
+            NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${{ secrets.STRIPE_PUBLISHABLE_KEY }}
            NEXT_PUBLIC_GTM_ENABLED=true
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
--- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml
@@ -4,9 +4,6 @@ on:
  push:
    tags:
      - "*"
-    paths:
-      - 'backend/model_server/**'
-      - 'backend/Dockerfile.model_server'

 env:
  REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-model-server-cloud' || 'onyxdotapp/onyx-model-server' }}
@@ -15,7 +12,32 @@ env:
  BUILDKIT_PROGRESS: plain

 jobs:
+  # 1) Preliminary job to check if the changed files are relevant
+  check_model_server_changes:
+    runs-on: ubuntu-latest
+    outputs:
+      changed: ${{ steps.check.outputs.changed }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Check if relevant files changed
+        id: check
+        run: |
+          # Default to "false"
+          echo "changed=false" >> $GITHUB_OUTPUT
+
+          # Compare the previous commit (github.event.before) to the current one (github.sha)
+          # If any file in backend/model_server/** or backend/Dockerfile.model_server is changed,
+          # set changed=true
+          if git diff --name-only ${{ github.event.before }} ${{ github.sha }} \
+             | grep -E '^backend/model_server/|^backend/Dockerfile.model_server'; then
+            echo "changed=true" >> $GITHUB_OUTPUT
+          fi
+
  build-amd64:
+    needs: [check_model_server_changes]
+    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on:
      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"]
    steps:
@@ -55,6 +77,8 @@ jobs:
          provenance: false

  build-arm64:
+    needs: [check_model_server_changes]
+    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on:
      [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"]
    steps:
@@ -94,7 +118,8 @@ jobs:
          provenance: false

  merge-and-scan:
-    needs: [build-amd64, build-arm64]
+    needs: [build-amd64, build-arm64, check_model_server_changes]
+    if: needs.check_model_server_changes.outputs.changed == 'true'
    runs-on: ubuntu-latest
    steps:
      - name: Login to Docker Hub
--- a/.github/workflows/pr-python-connector-tests.yml
+++ b/.github/workflows/pr-python-connector-tests.yml
@@ -44,6 +44,9 @@ env:
  SHAREPOINT_CLIENT_SECRET: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
  SHAREPOINT_CLIENT_DIRECTORY_ID: ${{ secrets.SHAREPOINT_CLIENT_DIRECTORY_ID }}
  SHAREPOINT_SITE: ${{ secrets.SHAREPOINT_SITE }}
+  # Gitbook
+  GITBOOK_SPACE_ID: ${{ secrets.GITBOOK_SPACE_ID }}
+  GITBOOK_API_KEY: ${{ secrets.GITBOOK_API_KEY }}

 jobs:
  connectors-check:
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -35,7 +35,9 @@ RUN apt-get update && \
        libuuid1=2.38.1-5+deb12u1 \
        libxmlsec1-dev \
        pkg-config \
-        gcc && \
+        gcc \
+        nano \
+        vim && \
    rm -rf /var/lib/apt/lists/* && \
    apt-get clean

--- a/backend/alembic/versions/f39c5794c10a_add_background_errors_table.py
+++ b/backend/alembic/versions/f39c5794c10a_add_background_errors_table.py
@@ -0,0 +1,40 @@
+"""Add background errors table
+
+Revision ID: f39c5794c10a
+Revises: 2cdeff6d8c93
+Create Date: 2025-02-12 17:11:14.527876
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = "f39c5794c10a"
+down_revision = "2cdeff6d8c93"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "background_error",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("message", sa.String(), nullable=False),
+        sa.Column(
+            "time_created",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.Column("cc_pair_id", sa.Integer(), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
+        sa.ForeignKeyConstraint(
+            ["cc_pair_id"],
+            ["connector_credential_pair.id"],
+            ondelete="CASCADE",
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("background_error")
--- a/backend/ee/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/ee/onyx/background/celery/tasks/beat_schedule.py
@@ -1,10 +1,10 @@
 from datetime import timedelta
 from typing import Any

-from onyx.background.celery.tasks.beat_schedule import BEAT_EXPIRES_DEFAULT
 from onyx.background.celery.tasks.beat_schedule import (
-    beat_system_tasks as base_beat_system_tasks,
+    beat_cloud_tasks as base_beat_system_tasks,
 )
+from onyx.background.celery.tasks.beat_schedule import BEAT_EXPIRES_DEFAULT
 from onyx.background.celery.tasks.beat_schedule import (
    beat_task_templates as base_beat_task_templates,
 )
--- a/backend/ee/onyx/configs/app_configs.py
+++ b/backend/ee/onyx/configs/app_configs.py
@@ -77,3 +77,5 @@ POSTHOG_HOST = os.environ.get("POSTHOG_HOST") or "https://us.i.posthog.com"
 HUBSPOT_TRACKING_URL = os.environ.get("HUBSPOT_TRACKING_URL")

 ANONYMOUS_USER_COOKIE_NAME = "onyx_anonymous_user"
+
+GATED_TENANTS_KEY = "gated_tenants"
--- a/backend/ee/onyx/external_permissions/confluence/group_sync.py
+++ b/backend/ee/onyx/external_permissions/confluence/group_sync.py
@@ -1,5 +1,6 @@
 from ee.onyx.db.external_perm import ExternalUserGroup
 from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
+from onyx.background.error_logging import emit_background_error
 from onyx.connectors.confluence.onyx_confluence import build_confluence_client
 from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
 from onyx.connectors.confluence.utils import get_user_email_from_username__server
@@ -10,7 +11,7 @@ logger = setup_logger()


 def _build_group_member_email_map(
-    confluence_client: OnyxConfluence,
+    confluence_client: OnyxConfluence, cc_pair_id: int
 ) -> dict[str, set[str]]:
    group_member_emails: dict[str, set[str]] = {}
    for user_result in confluence_client.paginated_cql_user_retrieval():
@@ -18,8 +19,11 @@ def _build_group_member_email_map(

        user = user_result.get("user", {})
        if not user:
-            logger.warning(f"user result missing user field: {user_result}")
+            msg = f"user result missing user field: {user_result}"
+            emit_background_error(msg, cc_pair_id=cc_pair_id)
+            logger.error(msg)
            continue
+
        email = user.get("email")
        if not email:
            # This field is only present in Confluence Server
@@ -32,7 +36,12 @@ def _build_group_member_email_map(
                )
        if not email:
            # If we still don't have an email, skip this user
-            logger.warning(f"user result missing email field: {user_result}")
+            msg = f"user result missing email field: {user_result}"
+            if user.get("type") == "app":
+                logger.warning(msg)
+            else:
+                emit_background_error(msg, cc_pair_id=cc_pair_id)
+                logger.error(msg)
            continue

        all_users_groups: set[str] = set()
@@ -42,11 +51,18 @@ def _build_group_member_email_map(
            group_member_emails.setdefault(group_id, set()).add(email)
            all_users_groups.add(group_id)

-        if not group_member_emails:
-            logger.warning(f"No groups found for user with email: {email}")
+        if not all_users_groups:
+            msg = f"No groups found for user with email: {email}"
+            emit_background_error(msg, cc_pair_id=cc_pair_id)
+            logger.error(msg)
        else:
            logger.debug(f"Found groups {all_users_groups} for user with email {email}")

+    if not group_member_emails:
+        msg = "No groups found for any users."
+        emit_background_error(msg, cc_pair_id=cc_pair_id)
+        logger.error(msg)
+
    return group_member_emails


@@ -61,6 +77,7 @@ def confluence_group_sync(

    group_member_email_map = _build_group_member_email_map(
        confluence_client=confluence_client,
+        cc_pair_id=cc_pair.id,
    )
    onyx_groups: list[ExternalUserGroup] = []
    all_found_emails = set()
--- a/backend/ee/onyx/server/query_and_chat/query_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/query_backend.py
@@ -83,6 +83,7 @@ def handle_search_request(
        user=user,
        llm=llm,
        fast_llm=fast_llm,
+        skip_query_analysis=False,
        db_session=db_session,
        bypass_acl=False,
    )
--- a/backend/ee/onyx/server/tenants/api.py
+++ b/backend/ee/onyx/server/tenants/api.py
@@ -18,11 +18,16 @@ from ee.onyx.server.tenants.anonymous_user_path import (
 from ee.onyx.server.tenants.anonymous_user_path import modify_anonymous_user_path
 from ee.onyx.server.tenants.anonymous_user_path import validate_anonymous_user_path
 from ee.onyx.server.tenants.billing import fetch_billing_information
+from ee.onyx.server.tenants.billing import fetch_stripe_checkout_session
 from ee.onyx.server.tenants.billing import fetch_tenant_stripe_information
 from ee.onyx.server.tenants.models import AnonymousUserPath
 from ee.onyx.server.tenants.models import BillingInformation
 from ee.onyx.server.tenants.models import ImpersonateRequest
 from ee.onyx.server.tenants.models import ProductGatingRequest
+from ee.onyx.server.tenants.models import ProductGatingResponse
+from ee.onyx.server.tenants.models import SubscriptionSessionResponse
+from ee.onyx.server.tenants.models import SubscriptionStatusResponse
+from ee.onyx.server.tenants.product_gating import store_product_gating
 from ee.onyx.server.tenants.provisioning import delete_user_from_control_plane
 from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
 from ee.onyx.server.tenants.user_mapping import remove_all_users_from_tenant
@@ -39,12 +44,9 @@ from onyx.db.auth import get_user_count
 from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session
 from onyx.db.engine import get_session_with_tenant
-from onyx.db.notification import create_notification
 from onyx.db.users import delete_user_from_db
 from onyx.db.users import get_user_by_email
 from onyx.server.manage.models import UserByEmail
-from onyx.server.settings.store import load_settings
-from onyx.server.settings.store import store_settings
 from onyx.utils.logger import setup_logger
 from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

@@ -126,37 +128,29 @@ async def login_as_anonymous_user(
@router.post("/product-gating")
 def gate_product(
    product_gating_request: ProductGatingRequest, _: None = Depends(control_plane_dep)
-) -> None:
+) -> ProductGatingResponse:
    """
    Gating the product means that the product is not available to the tenant.
    They will be directed to the billing page.
-    We gate the product when
-    1) User has ended free trial without adding payment method
-    2) User's card has declined
+    We gate the product when their subscription has ended.
    """
-    tenant_id = product_gating_request.tenant_id
-    token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+    try:
+        store_product_gating(
+            product_gating_request.tenant_id, product_gating_request.application_status
+        )
+        return ProductGatingResponse(updated=True, error=None)

-    settings = load_settings()
-    settings.product_gating = product_gating_request.product_gating
-    store_settings(settings)
-
-    if product_gating_request.notification:
-        with get_session_with_tenant(tenant_id) as db_session:
-            create_notification(None, product_gating_request.notification, db_session)
-
-    if token is not None:
-        CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
+    except Exception as e:
+        logger.exception("Failed to gate product")
+        return ProductGatingResponse(updated=False, error=str(e))


-@router.get("/billing-information", response_model=BillingInformation)
+@router.get("/billing-information")
 async def billing_information(
    _: User = Depends(current_admin_user),
-) -> BillingInformation:
+) -> BillingInformation | SubscriptionStatusResponse:
    logger.info("Fetching billing information")
-    return BillingInformation(
-        **fetch_billing_information(CURRENT_TENANT_ID_CONTEXTVAR.get())
-    )
+    return fetch_billing_information(CURRENT_TENANT_ID_CONTEXTVAR.get())


@router.post("/create-customer-portal-session")
@@ -169,9 +163,10 @@ async def create_customer_portal_session(_: User = Depends(current_admin_user))
        if not stripe_customer_id:
            raise HTTPException(status_code=400, detail="Stripe customer ID not found")
        logger.info(stripe_customer_id)
+
        portal_session = stripe.billing_portal.Session.create(
            customer=stripe_customer_id,
-            return_url=f"{WEB_DOMAIN}/admin/cloud-settings",
+            return_url=f"{WEB_DOMAIN}/admin/billing",
        )
        logger.info(portal_session)
        return {"url": portal_session.url}
@@ -180,6 +175,20 @@ async def create_customer_portal_session(_: User = Depends(current_admin_user))
        raise HTTPException(status_code=500, detail=str(e))


+@router.post("/create-subscription-session")
+async def create_subscription_session(
+    _: User = Depends(current_admin_user),
+) -> SubscriptionSessionResponse:
+    try:
+        tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get()
+        session_id = fetch_stripe_checkout_session(tenant_id)
+        return SubscriptionSessionResponse(sessionId=session_id)
+
+    except Exception as e:
+        logger.exception("Failed to create resubscription session")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
@router.post("/impersonate")
 async def impersonate_user(
    impersonate_request: ImpersonateRequest,
--- a/backend/ee/onyx/server/tenants/billing.py
+++ b/backend/ee/onyx/server/tenants/billing.py
@@ -6,6 +6,7 @@ import stripe
 from ee.onyx.configs.app_configs import STRIPE_PRICE_ID
 from ee.onyx.configs.app_configs import STRIPE_SECRET_KEY
 from ee.onyx.server.tenants.access import generate_data_plane_token
+from ee.onyx.server.tenants.models import BillingInformation
 from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
 from onyx.utils.logger import setup_logger

@@ -14,6 +15,19 @@ stripe.api_key = STRIPE_SECRET_KEY
 logger = setup_logger()


+def fetch_stripe_checkout_session(tenant_id: str) -> str:
+    token = generate_data_plane_token()
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+    }
+    url = f"{CONTROL_PLANE_API_BASE_URL}/create-checkout-session"
+    params = {"tenant_id": tenant_id}
+    response = requests.post(url, headers=headers, params=params)
+    response.raise_for_status()
+    return response.json()["sessionId"]
+
+
 def fetch_tenant_stripe_information(tenant_id: str) -> dict:
    token = generate_data_plane_token()
    headers = {
@@ -27,7 +41,7 @@ def fetch_tenant_stripe_information(tenant_id: str) -> dict:
    return response.json()


-def fetch_billing_information(tenant_id: str) -> dict:
+def fetch_billing_information(tenant_id: str) -> BillingInformation:
    logger.info("Fetching billing information")
    token = generate_data_plane_token()
    headers = {
@@ -38,7 +52,7 @@ def fetch_billing_information(tenant_id: str) -> dict:
    params = {"tenant_id": tenant_id}
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
-    billing_info = response.json()
+    billing_info = BillingInformation(**response.json())
    return billing_info


--- a/backend/ee/onyx/server/tenants/models.py
+++ b/backend/ee/onyx/server/tenants/models.py
@@ -1,7 +1,8 @@
+from datetime import datetime
+
 from pydantic import BaseModel

-from onyx.configs.constants import NotificationType
-from onyx.server.settings.models import GatingType
+from onyx.server.settings.models import ApplicationStatus


 class CheckoutSessionCreationRequest(BaseModel):
@@ -15,15 +16,24 @@ class CreateTenantRequest(BaseModel):

 class ProductGatingRequest(BaseModel):
    tenant_id: str
-    product_gating: GatingType
-    notification: NotificationType | None = None
+    application_status: ApplicationStatus
+
+
+class SubscriptionStatusResponse(BaseModel):
+    subscribed: bool


 class BillingInformation(BaseModel):
+    stripe_subscription_id: str
+    status: str
+    current_period_start: datetime
+    current_period_end: datetime
+    number_of_seats: int
+    cancel_at_period_end: bool
+    canceled_at: datetime | None
+    trial_start: datetime | None
+    trial_end: datetime | None
    seats: int
-    subscription_status: str
-    billing_start: str
-    billing_end: str
    payment_method_enabled: bool


@@ -48,3 +58,12 @@ class TenantDeletionPayload(BaseModel):

 class AnonymousUserPath(BaseModel):
    anonymous_user_path: str | None
+
+
+class ProductGatingResponse(BaseModel):
+    updated: bool
+    error: str | None
+
+
+class SubscriptionSessionResponse(BaseModel):
+    sessionId: str
--- a/backend/ee/onyx/server/tenants/product_gating.py
+++ b/backend/ee/onyx/server/tenants/product_gating.py
@@ -0,0 +1,51 @@
+from typing import cast
+
+from ee.onyx.configs.app_configs import GATED_TENANTS_KEY
+from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
+from onyx.redis.redis_pool import get_redis_client
+from onyx.redis.redis_pool import get_redis_replica_client
+from onyx.server.settings.models import ApplicationStatus
+from onyx.server.settings.store import load_settings
+from onyx.server.settings.store import store_settings
+from onyx.setup import setup_logger
+from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR
+
+logger = setup_logger()
+
+
+def update_tenant_gating(tenant_id: str, status: ApplicationStatus) -> None:
+    redis_client = get_redis_client(tenant_id=ONYX_CLOUD_TENANT_ID)
+
+    # Store the full status
+    status_key = f"tenant:{tenant_id}:status"
+    redis_client.set(status_key, status.value)
+
+    # Maintain the GATED_ACCESS set
+    if status == ApplicationStatus.GATED_ACCESS:
+        redis_client.sadd(GATED_TENANTS_KEY, tenant_id)
+    else:
+        redis_client.srem(GATED_TENANTS_KEY, tenant_id)
+
+
+def store_product_gating(tenant_id: str, application_status: ApplicationStatus) -> None:
+    try:
+        token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id)
+
+        settings = load_settings()
+        settings.application_status = application_status
+        store_settings(settings)
+
+        # Store gated tenant information in Redis
+        update_tenant_gating(tenant_id, application_status)
+
+        if token is not None:
+            CURRENT_TENANT_ID_CONTEXTVAR.reset(token)
+
+    except Exception:
+        logger.exception("Failed to gate product")
+        raise
+
+
+def get_gated_tenants() -> set[str]:
+    redis_client = get_redis_replica_client(tenant_id=ONYX_CLOUD_TENANT_ID)
+    return cast(set[str], redis_client.smembers(GATED_TENANTS_KEY))
--- a/backend/onyx/agents/agent_search/basic/graph_builder.py
+++ b/backend/onyx/agents/agent_search/basic/graph_builder.py
@@ -85,7 +85,7 @@ if __name__ == "__main__":

    graph = basic_graph_builder()
    compiled_graph = graph.compile()
-    input = BasicInput(_unused=True)
+    input = BasicInput(unused=True)
    primary_llm, fast_llm = get_default_llms()
    with get_session_context_manager() as db_session:
        config, _ = get_test_config(
--- a/backend/onyx/agents/agent_search/basic/states.py
+++ b/backend/onyx/agents/agent_search/basic/states.py
@@ -17,7 +17,7 @@ from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
 class BasicInput(BaseModel):
    # Langgraph needs a nonempty input, but we pass in all static
    # data through a RunnableConfig.
-    _unused: bool = True
+    unused: bool = True


 ## Graph Output State
--- a/backend/onyx/agents/agent_search/core_state.py
+++ b/backend/onyx/agents/agent_search/core_state.py
@@ -9,7 +9,6 @@ class CoreState(BaseModel):
    This is the core state that is shared across all subgraphs.
    """

-    base_question: str = ""
    log_messages: Annotated[list[str], add] = []


@@ -18,4 +17,4 @@ class SubgraphCoreState(BaseModel):
    This is the core state that is shared across all subgraphs.
    """

-    log_messages: Annotated[list[str], add]
+    log_messages: Annotated[list[str], add] = []
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
@@ -1,8 +1,8 @@
 from datetime import datetime
 from typing import cast

+from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
-from langchain_core.messages import merge_message_runs
 from langchain_core.runnables.config import RunnableConfig

 from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
@@ -12,14 +12,43 @@ from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer
    SubQuestionAnswerCheckUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import AgentLLMErrorType
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. The sub-answer will be treated as 'relevant'",
+    rate_limit="LLM Rate Limit Error. The sub-answer will be treated as 'relevant'",
+    general_error="General LLM Error. The sub-answer will be treated as 'relevant'",
+)


+@log_function_time(print_only=True)
 def check_sub_answer(
    state: AnswerQuestionState, config: RunnableConfig
 ) -> SubQuestionAnswerCheckUpdate:
@@ -53,14 +82,40 @@ def check_sub_answer(

    graph_config = cast(GraphConfig, config["metadata"]["config"])
    fast_llm = graph_config.tooling.fast_llm
-    response = list(
-        fast_llm.stream(
+    agent_error: AgentErrorLog | None = None
+    response: BaseMessage | None = None
+    try:
+        response = fast_llm.invoke(
            prompt=msg,
+            timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK,
        )
-    )

-    quality_str: str = merge_message_runs(response, chunk_separator="")[0].content
-    answer_quality = "yes" in quality_str.lower()
+        quality_str: str = cast(str, response.content)
+        answer_quality = binary_string_test(
+            text=quality_str, positive_value=AGENT_POSITIVE_VALUE_STR
+        )
+        log_result = f"Answer quality: {quality_str}"
+
+    except LLMTimeoutError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        answer_quality = True
+        log_result = agent_error.error_result
+        logger.error("LLM Timeout Error - check sub answer")
+
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+
+        answer_quality = True
+        log_result = agent_error.error_result
+        logger.error("LLM Rate Limit Error - check sub answer")

    return SubQuestionAnswerCheckUpdate(
        answer_quality=answer_quality,
@@ -69,7 +124,7 @@ def check_sub_answer(
                graph_component="initial  - generate individual sub answer",
                node_name="check sub answer",
                node_start_time=node_start_time,
-                result=f"Answer quality: {quality_str}",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
@@ -16,6 +16,23 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_sub_question_answer_prompt,
 )
+from onyx.agents.agent_search.shared_graph_utils.calculations import (
+    dedup_sort_inference_section_list,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    LLM_ANSWER_ERROR_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
@@ -30,12 +47,23 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import NO_RECOVERED_DOCS
 from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time

 logger = setup_logger()

+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. A sub-answer could not be constructed and the sub-question will be ignored.",
+    rate_limit="LLM Rate Limit Error. A sub-answer could not be constructed and the sub-question will be ignored.",
+    general_error="General LLM Error. A sub-answer could not be constructed and the sub-question will be ignored.",
+)

+
+@log_function_time(print_only=True)
 def generate_sub_answer(
    state: AnswerQuestionState,
    config: RunnableConfig,
@@ -51,12 +79,17 @@ def generate_sub_answer(
    state.verified_reranked_documents
    level, question_num = parse_question_id(state.question_id)
    context_docs = state.context_documents[:AGENT_MAX_ANSWER_CONTEXT_DOCS]
+
+    context_docs = dedup_sort_inference_section_list(context_docs)
+
    persona_contextualized_prompt = get_persona_agent_prompt_expressions(
        graph_config.inputs.search_request.persona
    ).contextualized_prompt

    if len(context_docs) == 0:
        answer_str = NO_RECOVERED_DOCS
+        cited_documents: list = []
+        log_results = "No documents retrieved"
        write_custom_event(
            "sub_answers",
            AgentAnswerPiece(
@@ -79,41 +112,67 @@ def generate_sub_answer(

        response: list[str | list[str | dict[str, Any]]] = []
        dispatch_timings: list[float] = []
-        for message in fast_llm.stream(
-            prompt=msg,
-        ):
-            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-            content = message.content
-            if not isinstance(content, str):
-                raise ValueError(
-                    f"Expected content to be a string, but got {type(content)}"
+
+        agent_error: AgentErrorLog | None = None
+
+        try:
+            for message in fast_llm.stream(
+                prompt=msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION,
+            ):
+                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+                content = message.content
+                if not isinstance(content, str):
+                    raise ValueError(
+                        f"Expected content to be a string, but got {type(content)}"
+                    )
+                start_stream_token = datetime.now()
+                write_custom_event(
+                    "sub_answers",
+                    AgentAnswerPiece(
+                        answer_piece=content,
+                        level=level,
+                        level_question_num=question_num,
+                        answer_type="agent_sub_answer",
+                    ),
+                    writer,
                )
-            start_stream_token = datetime.now()
-            write_custom_event(
-                "sub_answers",
-                AgentAnswerPiece(
-                    answer_piece=content,
-                    level=level,
-                    level_question_num=question_num,
-                    answer_type="agent_sub_answer",
-                ),
-                writer,
-            )
-            end_stream_token = datetime.now()
-            dispatch_timings.append(
-                (end_stream_token - start_stream_token).microseconds
-            )
-            response.append(content)
+                end_stream_token = datetime.now()
+                dispatch_timings.append(
+                    (end_stream_token - start_stream_token).microseconds
+                )
+                response.append(content)

-        answer_str = merge_message_runs(response, chunk_separator="")[0].content
-        logger.debug(
-            f"Average dispatch time: {sum(dispatch_timings) / len(dispatch_timings)}"
-        )
+        except LLMTimeoutError:
+            agent_error = AgentErrorLog(
+                error_type=AgentLLMErrorType.TIMEOUT,
+                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+                error_result=_llm_node_error_strings.timeout,
+            )
+            logger.error("LLM Timeout Error - generate sub answer")
+        except LLMRateLimitError:
+            agent_error = AgentErrorLog(
+                error_type=AgentLLMErrorType.RATE_LIMIT,
+                error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+                error_result=_llm_node_error_strings.rate_limit,
+            )
+            logger.error("LLM Rate Limit Error - generate sub answer")

-    answer_citation_ids = get_answer_citation_ids(answer_str)
-    cited_documents = [
-        context_docs[id] for id in answer_citation_ids if id < len(context_docs)
-    ]
+        if agent_error:
+            answer_str = LLM_ANSWER_ERROR_MESSAGE
+            cited_documents = []
+            log_results = (
+                agent_error.error_result
+                or "Sub-answer generation failed due to LLM error"
+            )
+
+        else:
+            answer_str = merge_message_runs(response, chunk_separator="")[0].content
+            answer_citation_ids = get_answer_citation_ids(answer_str)
+            cited_documents = [
+                context_docs[id] for id in answer_citation_ids if id < len(context_docs)
+            ]
+            log_results = None

    stop_event = StreamStopInfo(
        stop_reason=StreamStopReason.FINISHED,
@@ -131,7 +190,7 @@ def generate_sub_answer(
                graph_component="initial - generate individual sub answer",
                node_name="generate sub answer",
                node_start_time=node_start_time,
-                result="",
+                result=log_results or "",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/states.py
@@ -42,10 +42,8 @@ class SubQuestionRetrievalIngestionUpdate(LoggerUpdate, BaseModel):


 class SubQuestionAnsweringInput(SubgraphCoreState):
-    question: str = ""
-    question_id: str = (
-        ""  # 0_0 is original question, everything else is <level>_<question_num>.
-    )
+    question: str
+    question_id: str
    # level 0 is original question and first decomposition, level 1 is follow up, etc
    # question_num is a unique number per original question per level.

--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
@@ -26,14 +26,31 @@ from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
+from onyx.agents.agent_search.shared_graph_utils.calculations import (
+    get_answer_generation_documents,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
 from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResultStats
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.operators import (
-    dedup_inference_sections,
+    dedup_inference_section_list,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    dispatch_main_answer_stop_info,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_deduplicated_structured_subquestion_documents,
+)
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
@@ -42,12 +59,16 @@ from onyx.agents.agent_search.shared_graph_utils.utils import remove_document_ci
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
+from onyx.chat.models import StreamingError
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
+from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
-from onyx.context.search.models import InferenceSection
-from onyx.prompts.agent_search import (
-    INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS,
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
 )
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
+from onyx.prompts.agent_search import INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS
 from onyx.prompts.agent_search import (
    INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
@@ -56,8 +77,16 @@ from onyx.prompts.agent_search import (
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
+from onyx.utils.timing import log_function_time
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. The initial answer could not be generated.",
+    rate_limit="LLM Rate Limit Error. The initial answer could not be generated.",
+    general_error="General LLM Error. The initial answer could not be generated.",
+)


+@log_function_time(print_only=True)
 def generate_initial_answer(
    state: SubQuestionRetrievalState,
    config: RunnableConfig,
@@ -73,15 +102,19 @@ def generate_initial_answer(
    question = graph_config.inputs.search_request.query
    prompt_enrichment_components = get_prompt_enrichment_components(graph_config)

-    sub_questions_cited_documents = state.cited_documents
+    # get all documents cited in sub-questions
+    structured_subquestion_docs = get_deduplicated_structured_subquestion_documents(
+        state.sub_question_results
+    )
+
    orig_question_retrieval_documents = state.orig_question_retrieved_documents

-    consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
+    consolidated_context_docs = structured_subquestion_docs.cited_documents
    counter = 0
    for original_doc_number, original_doc in enumerate(
        orig_question_retrieval_documents
    ):
-        if original_doc_number not in sub_questions_cited_documents:
+        if original_doc_number not in structured_subquestion_docs.cited_documents:
            if (
                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
                or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
@@ -90,15 +123,18 @@ def generate_initial_answer(
                counter += 1

    # sort docs by their scores - though the scores refer to different questions
-    relevant_docs = dedup_inference_sections(
-        consolidated_context_docs, consolidated_context_docs
-    )
+    relevant_docs = dedup_inference_section_list(consolidated_context_docs)

    sub_questions: list[str] = []
-    streamed_documents = (
-        relevant_docs
-        if len(relevant_docs) > 0
-        else state.orig_question_retrieved_documents[:15]
+
+    # Create the list of documents to stream out. Start with the
+    # ones that wil be in the context (or, if len == 0, use docs
+    # that were retrieved for the original question)
+    answer_generation_documents = get_answer_generation_documents(
+        relevant_docs=relevant_docs,
+        context_documents=structured_subquestion_docs.context_documents,
+        original_question_docs=orig_question_retrieval_documents,
+        max_docs=AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER,
    )

    # Use the query info from the base document retrieval
@@ -108,11 +144,13 @@ def generate_initial_answer(
        graph_config.tooling.search_tool
    ), "search_tool must be provided for agentic search"

-    relevance_list = relevance_from_docs(relevant_docs)
+    relevance_list = relevance_from_docs(
+        answer_generation_documents.streaming_documents
+    )
    for tool_response in yield_search_responses(
        query=question,
-        reranked_sections=streamed_documents,
-        final_context_sections=streamed_documents,
+        reranked_sections=answer_generation_documents.streaming_documents,
+        final_context_sections=answer_generation_documents.context_documents,
        search_query_info=query_info,
        get_section_relevance=lambda: relevance_list,
        search_tool=graph_config.tooling.search_tool,
@@ -128,7 +166,7 @@ def generate_initial_answer(
            writer,
        )

-    if len(relevant_docs) == 0:
+    if len(answer_generation_documents.context_documents) == 0:
        write_custom_event(
            "initial_agent_answer",
            AgentAnswerPiece(
@@ -194,7 +232,7 @@ def generate_initial_answer(

        model = graph_config.tooling.fast_llm

-        doc_context = format_docs(relevant_docs)
+        doc_context = format_docs(answer_generation_documents.context_documents)
        doc_context = trim_prompt_piece(
            config=model.config,
            prompt_piece=doc_context,
@@ -224,30 +262,82 @@ def generate_initial_answer(

        streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
        dispatch_timings: list[float] = []
-        for message in model.stream(msg):
-            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-            content = message.content
-            if not isinstance(content, str):
-                raise ValueError(
-                    f"Expected content to be a string, but got {type(content)}"
-                )
-            start_stream_token = datetime.now()

+        agent_error: AgentErrorLog | None = None
+
+        try:
+            for message in model.stream(
+                msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
+            ):
+                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+                content = message.content
+                if not isinstance(content, str):
+                    raise ValueError(
+                        f"Expected content to be a string, but got {type(content)}"
+                    )
+                start_stream_token = datetime.now()
+
+                write_custom_event(
+                    "initial_agent_answer",
+                    AgentAnswerPiece(
+                        answer_piece=content,
+                        level=0,
+                        level_question_num=0,
+                        answer_type="agent_level_answer",
+                    ),
+                    writer,
+                )
+                end_stream_token = datetime.now()
+                dispatch_timings.append(
+                    (end_stream_token - start_stream_token).microseconds
+                )
+                streamed_tokens.append(content)
+
+        except LLMTimeoutError:
+            agent_error = AgentErrorLog(
+                error_type=AgentLLMErrorType.TIMEOUT,
+                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+                error_result=_llm_node_error_strings.timeout,
+            )
+            logger.error("LLM Timeout Error - generate initial answer")
+
+        except LLMRateLimitError:
+            agent_error = AgentErrorLog(
+                error_type=AgentLLMErrorType.RATE_LIMIT,
+                error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+                error_result=_llm_node_error_strings.rate_limit,
+            )
+            logger.error("LLM Rate Limit Error - generate initial answer")
+
+        if agent_error:
            write_custom_event(
                "initial_agent_answer",
-                AgentAnswerPiece(
-                    answer_piece=content,
-                    level=0,
-                    level_question_num=0,
-                    answer_type="agent_level_answer",
+                StreamingError(
+                    error=AGENT_LLM_TIMEOUT_MESSAGE,
                ),
                writer,
            )
-            end_stream_token = datetime.now()
-            dispatch_timings.append(
-                (end_stream_token - start_stream_token).microseconds
+            return InitialAnswerUpdate(
+                initial_answer=None,
+                answer_error=AgentErrorLog(
+                    error_message=agent_error.error_message or "An LLM error occurred",
+                    error_type=agent_error.error_type,
+                    error_result=agent_error.error_result,
+                ),
+                initial_agent_stats=None,
+                generated_sub_questions=sub_questions,
+                agent_base_end_time=None,
+                agent_base_metrics=None,
+                log_messages=[
+                    get_langgraph_node_log_string(
+                        graph_component="initial - generate initial answer",
+                        node_name="generate initial answer",
+                        node_start_time=node_start_time,
+                        result=agent_error.error_result or "An LLM error occurred",
+                    )
+                ],
            )
-            streamed_tokens.append(content)

        logger.debug(
            f"Average dispatch time for initial answer: {sum(dispatch_timings) / len(dispatch_timings)}"
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/validate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/validate_initial_answer.py
@@ -10,8 +10,10 @@ from onyx.agents.agent_search.deep_search.main.states import (
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
+from onyx.utils.timing import log_function_time


+@log_function_time(print_only=True)
 def validate_initial_answer(
    state: SubQuestionRetrievalState,
 ) -> InitialAnswerQualityUpdate:
@@ -25,7 +27,7 @@ def validate_initial_answer(
        f"--------{node_start_time}--------Checking for base answer validity - for not set True/False manually"
    )

-    verdict = True
+    verdict = True  # not actually required as already streamed out. Refinement will do similar

    return InitialAnswerQualityUpdate(
        initial_answer_quality_eval=verdict,
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
@@ -23,6 +23,8 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_history_prompt,
 )
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
@@ -33,17 +35,30 @@ from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
 from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
-    INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH,
+    INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT,
 )
 from onyx.prompts.agent_search import (
-    INITIAL_QUESTION_DECOMPOSITION_PROMPT,
+    INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT,
 )
 from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time

 logger = setup_logger()

+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. Sub-questions could not be generated.",
+    rate_limit="LLM Rate Limit Error. Sub-questions could not be generated.",
+    general_error="General LLM Error. Sub-questions could not be generated.",
+)

+
+@log_function_time(print_only=True)
 def decompose_orig_question(
    state: SubQuestionRetrievalState,
    config: RunnableConfig,
@@ -85,15 +100,15 @@ def decompose_orig_question(
            ]
        )

-        decomposition_prompt = (
-            INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH.format(
-                question=question, sample_doc_str=sample_doc_str, history=history
-            )
+        decomposition_prompt = INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT.format(
+            question=question, sample_doc_str=sample_doc_str, history=history
        )

    else:
-        decomposition_prompt = INITIAL_QUESTION_DECOMPOSITION_PROMPT.format(
-            question=question, history=history
+        decomposition_prompt = (
+            INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT.format(
+                question=question, history=history
+            )
        )

    # Start decomposition
@@ -112,32 +127,42 @@ def decompose_orig_question(
    )

    # dispatches custom events for subquestion tokens, adding in subquestion ids.
-    streamed_tokens = dispatch_separated(
-        model.stream(msg),
-        dispatch_subquestion(0, writer),
-        sep_callback=dispatch_subquestion_sep(0, writer),
-    )

-    stop_event = StreamStopInfo(
-        stop_reason=StreamStopReason.FINISHED,
-        stream_type=StreamType.SUB_QUESTIONS,
-        level=0,
-    )
-    write_custom_event("stream_finished", stop_event, writer)
+    streamed_tokens: list[BaseMessage_Content] = []

-    deomposition_response = merge_content(*streamed_tokens)
+    try:
+        streamed_tokens = dispatch_separated(
+            model.stream(
+                msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
+            ),
+            dispatch_subquestion(0, writer),
+            sep_callback=dispatch_subquestion_sep(0, writer),
+        )

-    # this call should only return strings. Commenting out for efficiency
-    # assert [type(tok) == str for tok in streamed_tokens]
+        decomposition_response = merge_content(*streamed_tokens)

-    # use no-op cast() instead of str() which runs code
-    # list_of_subquestions = clean_and_parse_list_string(cast(str, response))
-    list_of_subqs = cast(str, deomposition_response).split("\n")
+        list_of_subqs = cast(str, decomposition_response).split("\n")

-    decomp_list: list[str] = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
+        initial_sub_questions = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
+        log_result = f"decomposed original question into {len(initial_sub_questions)} subquestions"
+
+        stop_event = StreamStopInfo(
+            stop_reason=StreamStopReason.FINISHED,
+            stream_type=StreamType.SUB_QUESTIONS,
+            level=0,
+        )
+        write_custom_event("stream_finished", stop_event, writer)
+
+    except LLMTimeoutError as e:
+        logger.error("LLM Timeout Error - decompose orig question")
+        raise e  # fail loudly on this critical step
+    except LLMRateLimitError as e:
+        logger.error("LLM Rate Limit Error - decompose orig question")
+        raise e

    return InitialQuestionDecompositionUpdate(
-        initial_sub_questions=decomp_list,
+        initial_sub_questions=initial_sub_questions,
        agent_start_time=agent_start_time,
        agent_refined_start_time=None,
        agent_refined_end_time=None,
@@ -151,7 +176,7 @@ def decompose_orig_question(
                graph_component="initial - generate sub answers",
                node_name="decompose original question",
                node_start_time=node_start_time,
-                result=f"decomposed original question into {len(decomp_list)} subquestions",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
@@ -26,8 +26,8 @@ from onyx.agents.agent_search.deep_search.main.nodes.decide_refinement_need impo
 from onyx.agents.agent_search.deep_search.main.nodes.extract_entities_terms import (
    extract_entities_terms,
 )
-from onyx.agents.agent_search.deep_search.main.nodes.generate_refined_answer import (
-    generate_refined_answer,
+from onyx.agents.agent_search.deep_search.main.nodes.generate_validate_refined_answer import (
+    generate_validate_refined_answer,
 )
 from onyx.agents.agent_search.deep_search.main.nodes.ingest_refined_sub_answers import (
    ingest_refined_sub_answers,
@@ -126,8 +126,8 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:

    # Node to generate the refined answer
    graph.add_node(
-        node="generate_refined_answer",
-        action=generate_refined_answer,
+        node="generate_validate_refined_answer",
+        action=generate_validate_refined_answer,
    )

    # Early node to extract the entities and terms from the initial answer,
@@ -215,11 +215,11 @@ def main_graph_builder(test_mode: bool = False) -> StateGraph:

    graph.add_edge(
        start_key="ingest_refined_sub_answers",
-        end_key="generate_refined_answer",
+        end_key="generate_validate_refined_answer",
    )

    graph.add_edge(
-        start_key="generate_refined_answer",
+        start_key="generate_validate_refined_answer",
        end_key="compare_answers",
    )
    graph.add_edge(
@@ -252,9 +252,7 @@ if __name__ == "__main__":
            db_session, primary_llm, fast_llm, search_request
        )

-        inputs = MainInput(
-            base_question=graph_config.inputs.search_request.query, log_messages=[]
-        )
+        inputs = MainInput(log_messages=[])

        for thing in compiled_graph.stream(
            input=inputs,
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
@@ -1,6 +1,7 @@
 from datetime import datetime
 from typing import cast

+from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.runnables import RunnableConfig
 from langgraph.types import StreamWriter
@@ -10,16 +11,51 @@ from onyx.agents.agent_search.deep_search.main.states import (
 )
 from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import RefinedAnswerImprovement
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
 )
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out, and the answers could not be compared.",
+    rate_limit="The LLM encountered a rate limit, and the answers could not be compared.",
+    general_error="The LLM encountered an error, and the answers could not be compared.",
+)
+
+_ANSWER_QUALITY_NOT_SUFFICIENT_MESSAGE = (
+    "Answer quality is not sufficient, so stay with the initial answer."
+)


+@log_function_time(print_only=True)
 def compare_answers(
    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> InitialRefinedAnswerComparisonUpdate:
@@ -34,21 +70,75 @@ def compare_answers(
    initial_answer = state.initial_answer
    refined_answer = state.refined_answer

+    # if answer quality is not sufficient, then stay with the initial answer
+    if not state.refined_answer_quality:
+        write_custom_event(
+            "refined_answer_improvement",
+            RefinedAnswerImprovement(
+                refined_answer_improvement=False,
+            ),
+            writer,
+        )
+
+        return InitialRefinedAnswerComparisonUpdate(
+            refined_answer_improvement_eval=False,
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="main",
+                    node_name="compare answers",
+                    node_start_time=node_start_time,
+                    result=_ANSWER_QUALITY_NOT_SUFFICIENT_MESSAGE,
+                )
+            ],
+        )
+
    compare_answers_prompt = INITIAL_REFINED_ANSWER_COMPARISON_PROMPT.format(
        question=question, initial_answer=initial_answer, refined_answer=refined_answer
    )

    msg = [HumanMessage(content=compare_answers_prompt)]

+    agent_error: AgentErrorLog | None = None
    # Get the rewritten queries in a defined format
    model = graph_config.tooling.fast_llm
-
+    resp: BaseMessage | None = None
+    refined_answer_improvement: bool | None = None
    # no need to stream this
-    resp = model.invoke(msg)
+    try:
+        resp = model.invoke(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
+        )

-    refined_answer_improvement = (
-        isinstance(resp.content, str) and "yes" in resp.content.lower()
-    )
+    except LLMTimeoutError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - compare answers")
+        # continue as True in this support step
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - compare answers")
+        # continue as True in this support step
+
+    if agent_error or resp is None:
+        refined_answer_improvement = True
+        if agent_error:
+            log_result = agent_error.error_result
+        else:
+            log_result = "An answer could not be generated."
+
+    else:
+        refined_answer_improvement = binary_string_test(
+            text=cast(str, resp.content),
+            positive_value=AGENT_POSITIVE_VALUE_STR,
+        )
+        log_result = f"Answer comparison: {refined_answer_improvement}"

    write_custom_event(
        "refined_answer_improvement",
@@ -65,7 +155,7 @@ def compare_answers(
                graph_component="main",
                node_name="compare answers",
                node_start_time=node_start_time,
-                result=f"Answer comparison: {refined_answer_improvement}",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
@@ -21,6 +21,18 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_history_prompt,
 )
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    format_entity_term_extraction,
@@ -30,12 +42,31 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import StreamingError
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
-    REFINEMENT_QUESTION_DECOMPOSITION_PROMPT,
+    REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS,
 )
 from onyx.tools.models import ToolCallKickoff
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_ANSWERED_SUBQUESTIONS_DIVIDER = "\n\n---\n\n"
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out. The sub-questions could not be generated.",
+    rate_limit="The LLM encountered a rate limit. The sub-questions could not be generated.",
+    general_error="The LLM encountered an error. The sub-questions could not be generated.",
+)


+@log_function_time(print_only=True)
 def create_refined_sub_questions(
    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> RefinedQuestionDecompositionUpdate:
@@ -72,8 +103,10 @@ def create_refined_sub_questions(

    initial_question_answers = state.sub_question_results

-    addressed_question_list = [
-        x.question for x in initial_question_answers if x.verified_high_quality
+    addressed_subquestions_with_answers = [
+        f"Subquestion: {x.question}\nSubanswer:\n{x.answer}"
+        for x in initial_question_answers
+        if x.verified_high_quality and x.answer
    ]

    failed_question_list = [
@@ -82,12 +115,14 @@ def create_refined_sub_questions(

    msg = [
        HumanMessage(
-            content=REFINEMENT_QUESTION_DECOMPOSITION_PROMPT.format(
+            content=REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS.format(
                question=question,
                history=history,
                entity_term_extraction_str=entity_term_extraction_str,
                base_answer=base_answer,
-                answered_sub_questions="\n - ".join(addressed_question_list),
+                answered_subquestions_with_answers=_ANSWERED_SUBQUESTIONS_DIVIDER.join(
+                    addressed_subquestions_with_answers
+                ),
                failed_sub_questions="\n - ".join(failed_question_list),
            ),
        )
@@ -96,29 +131,65 @@ def create_refined_sub_questions(
    # Grader
    model = graph_config.tooling.fast_llm

-    streamed_tokens = dispatch_separated(
-        model.stream(msg),
-        dispatch_subquestion(1, writer),
-        sep_callback=dispatch_subquestion_sep(1, writer),
-    )
-    response = merge_content(*streamed_tokens)
+    agent_error: AgentErrorLog | None = None
+    streamed_tokens: list[BaseMessage_Content] = []
+    try:
+        streamed_tokens = dispatch_separated(
+            model.stream(
+                msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
+            ),
+            dispatch_subquestion(1, writer),
+            sep_callback=dispatch_subquestion_sep(1, writer),
+        )
+    except LLMTimeoutError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - create refined sub questions")

-    if isinstance(response, str):
-        parsed_response = [q for q in response.split("\n") if q.strip() != ""]
-    else:
-        raise ValueError("LLM response is not a string")
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - create refined sub questions")

-    refined_sub_question_dict = {}
-    for sub_question_num, sub_question in enumerate(parsed_response):
-        refined_sub_question = RefinementSubQuestion(
-            sub_question=sub_question,
-            sub_question_id=make_question_id(1, sub_question_num + 1),
-            verified=False,
-            answered=False,
-            answer="",
+    if agent_error:
+        refined_sub_question_dict: dict[int, RefinementSubQuestion] = {}
+        log_result = agent_error.error_result
+        write_custom_event(
+            "refined_sub_question_creation_error",
+            StreamingError(
+                error="Your LLM was not able to create refined sub questions in time and timed out. Please try again.",
+            ),
+            writer,
        )

-        refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
+    else:
+        response = merge_content(*streamed_tokens)
+
+        if isinstance(response, str):
+            parsed_response = [q for q in response.split("\n") if q.strip() != ""]
+        else:
+            raise ValueError("LLM response is not a string")
+
+        refined_sub_question_dict = {}
+        for sub_question_num, sub_question in enumerate(parsed_response):
+            refined_sub_question = RefinementSubQuestion(
+                sub_question=sub_question,
+                sub_question_id=make_question_id(1, sub_question_num + 1),
+                verified=False,
+                answered=False,
+                answer="",
+            )
+
+            refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
+
+        log_result = f"Created {len(refined_sub_question_dict)} refined sub questions"

    return RefinedQuestionDecompositionUpdate(
        refined_sub_questions=refined_sub_question_dict,
@@ -128,7 +199,7 @@ def create_refined_sub_questions(
                graph_component="main",
                node_name="create refined sub questions",
                node_start_time=node_start_time,
-                result=f"Created {len(refined_sub_question_dict)} refined sub questions",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
@@ -11,8 +11,10 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
+from onyx.utils.timing import log_function_time


+@log_function_time(print_only=True)
 def decide_refinement_need(
    state: MainState, config: RunnableConfig
 ) -> RequireRefinemenEvalUpdate:
@@ -26,6 +28,19 @@ def decide_refinement_need(

    decision = True  # TODO: just for current testing purposes

+    if state.answer_error:
+        return RequireRefinemenEvalUpdate(
+            require_refined_answer_eval=False,
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="main",
+                    node_name="decide refinement need",
+                    node_start_time=node_start_time,
+                    result="Timeout Error",
+                )
+            ],
+        )
+
    log_messages = [
        get_langgraph_node_log_string(
            graph_component="main",
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
@@ -21,11 +21,16 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
+)
 from onyx.configs.constants import NUM_EXPLORATORY_DOCS
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE
+from onyx.utils.timing import log_function_time


+@log_function_time(print_only=True)
 def extract_entities_terms(
    state: MainState, config: RunnableConfig
 ) -> EntityTermExtractionUpdate:
@@ -81,6 +86,7 @@ def extract_entities_terms(
    # Grader
    llm_response = fast_llm.invoke(
        prompt=msg,
+        timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
    )

    cleaned_response = (
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_validate_refined_answer.py
@@ -11,27 +11,49 @@ from onyx.agents.agent_search.deep_search.main.models import (
    AgentRefinedMetrics,
 )
 from onyx.agents.agent_search.deep_search.main.operations import get_query_info
-from onyx.agents.agent_search.deep_search.main.operations import logger
 from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.deep_search.main.states import (
    RefinedAnswerUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test_after_answer_separator,
+)
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    get_prompt_enrichment_components,
 )
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
-from onyx.agents.agent_search.shared_graph_utils.models import InferenceSection
+from onyx.agents.agent_search.shared_graph_utils.calculations import (
+    get_answer_generation_documents,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import AGENT_ANSWER_SEPARATOR
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats
 from onyx.agents.agent_search.shared_graph_utils.operators import (
-    dedup_inference_sections,
+    dedup_inference_section_list,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    dispatch_main_answer_stop_info,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_deduplicated_structured_subquestion_documents,
+)
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
@@ -43,26 +65,50 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
+from onyx.chat.models import StreamingError
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
+from onyx.configs.agent_configs import AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION,
+)
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS,
 )
 from onyx.prompts.agent_search import (
    REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
+from onyx.prompts.agent_search import (
+    REFINED_ANSWER_VALIDATION_PROMPT,
+)
 from onyx.prompts.agent_search import (
    SUB_QUESTION_ANSWER_TEMPLATE_REFINED,
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out. The refined answer could not be generated.",
+    rate_limit="The LLM encountered a rate limit. The refined answer could not be generated.",
+    general_error="The LLM encountered an error. The refined answer could not be generated.",
+)


-def generate_refined_answer(
+@log_function_time(print_only=True)
+def generate_validate_refined_answer(
    state: MainState, config: RunnableConfig, writer: StreamWriter = lambda _: None
 ) -> RefinedAnswerUpdate:
    """
-    LangGraph node to generate the refined answer.
+    LangGraph node to generate the refined answer and validate it.
    """

    node_start_time = datetime.now()
@@ -76,19 +122,24 @@ def generate_refined_answer(
    )

    verified_reranked_documents = state.verified_reranked_documents
-    sub_questions_cited_documents = state.cited_documents
+
+    # get all documents cited in sub-questions
+    structured_subquestion_docs = get_deduplicated_structured_subquestion_documents(
+        state.sub_question_results
+    )
+
    original_question_verified_documents = (
        state.orig_question_verified_reranked_documents
    )
    original_question_retrieved_documents = state.orig_question_retrieved_documents

-    consolidated_context_docs: list[InferenceSection] = sub_questions_cited_documents
+    consolidated_context_docs = structured_subquestion_docs.cited_documents

    counter = 0
    for original_doc_number, original_doc in enumerate(
        original_question_verified_documents
    ):
-        if original_doc_number not in sub_questions_cited_documents:
+        if original_doc_number not in structured_subquestion_docs.cited_documents:
            if (
                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
                or len(consolidated_context_docs)
@@ -99,14 +150,16 @@ def generate_refined_answer(
                counter += 1

    # sort docs by their scores - though the scores refer to different questions
-    relevant_docs = dedup_inference_sections(
-        consolidated_context_docs, consolidated_context_docs
-    )
+    relevant_docs = dedup_inference_section_list(consolidated_context_docs)

-    streaming_docs = (
-        relevant_docs
-        if len(relevant_docs) > 0
-        else original_question_retrieved_documents[:15]
+    # Create the list of documents to stream out. Start with the
+    # ones that wil be in the context (or, if len == 0, use docs
+    # that were retrieved for the original question)
+    answer_generation_documents = get_answer_generation_documents(
+        relevant_docs=relevant_docs,
+        context_documents=structured_subquestion_docs.context_documents,
+        original_question_docs=original_question_retrieved_documents,
+        max_docs=AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER,
    )

    query_info = get_query_info(state.orig_question_sub_query_retrieval_results)
@@ -114,11 +167,13 @@ def generate_refined_answer(
        graph_config.tooling.search_tool
    ), "search_tool must be provided for agentic search"
    # stream refined answer docs, or original question docs if no relevant docs are found
-    relevance_list = relevance_from_docs(relevant_docs)
+    relevance_list = relevance_from_docs(
+        answer_generation_documents.streaming_documents
+    )
    for tool_response in yield_search_responses(
        query=question,
-        reranked_sections=streaming_docs,
-        final_context_sections=streaming_docs,
+        reranked_sections=answer_generation_documents.streaming_documents,
+        final_context_sections=answer_generation_documents.context_documents,
        search_query_info=query_info,
        get_section_relevance=lambda: relevance_list,
        search_tool=graph_config.tooling.search_tool,
@@ -199,7 +254,7 @@ def generate_refined_answer(
    )

    model = graph_config.tooling.fast_llm
-    relevant_docs_str = format_docs(relevant_docs)
+    relevant_docs_str = format_docs(answer_generation_documents.context_documents)
    relevant_docs_str = trim_prompt_piece(
        model.config,
        relevant_docs_str,
@@ -231,28 +286,80 @@ def generate_refined_answer(

    streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
    dispatch_timings: list[float] = []
-    for message in model.stream(msg):
-        # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-        content = message.content
-        if not isinstance(content, str):
-            raise ValueError(
-                f"Expected content to be a string, but got {type(content)}"
-            )
+    agent_error: AgentErrorLog | None = None

-        start_stream_token = datetime.now()
+    try:
+        for message in model.stream(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
+        ):
+            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+            content = message.content
+            if not isinstance(content, str):
+                raise ValueError(
+                    f"Expected content to be a string, but got {type(content)}"
+                )
+
+            start_stream_token = datetime.now()
+            write_custom_event(
+                "refined_agent_answer",
+                AgentAnswerPiece(
+                    answer_piece=content,
+                    level=1,
+                    level_question_num=0,
+                    answer_type="agent_level_answer",
+                ),
+                writer,
+            )
+            end_stream_token = datetime.now()
+            dispatch_timings.append(
+                (end_stream_token - start_stream_token).microseconds
+            )
+            streamed_tokens.append(content)
+
+    except LLMTimeoutError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - generate refined answer")
+
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - generate refined answer")
+
+    if agent_error:
        write_custom_event(
-            "refined_agent_answer",
-            AgentAnswerPiece(
-                answer_piece=content,
-                level=1,
-                level_question_num=0,
-                answer_type="agent_level_answer",
+            "initial_agent_answer",
+            StreamingError(
+                error=AGENT_LLM_TIMEOUT_MESSAGE,
            ),
            writer,
        )
-        end_stream_token = datetime.now()
-        dispatch_timings.append((end_stream_token - start_stream_token).microseconds)
-        streamed_tokens.append(content)
+
+        return RefinedAnswerUpdate(
+            refined_answer=None,
+            refined_answer_quality=False,  # TODO: replace this with the actual check value
+            refined_agent_stats=None,
+            agent_refined_end_time=None,
+            agent_refined_metrics=AgentRefinedMetrics(
+                refined_doc_boost_factor=0.0,
+                refined_question_boost_factor=0.0,
+                duration_s=None,
+            ),
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="main",
+                    node_name="generate refined answer",
+                    node_start_time=node_start_time,
+                    result=agent_error.error_result or "An LLM error occurred",
+                )
+            ],
+        )

    logger.debug(
        f"Average dispatch time for refined answer: {sum(dispatch_timings) / len(dispatch_timings)}"
@@ -261,54 +368,43 @@ def generate_refined_answer(
    response = merge_content(*streamed_tokens)
    answer = cast(str, response)

+    # run a validation step for the refined answer only
+
+    msg = [
+        HumanMessage(
+            content=REFINED_ANSWER_VALIDATION_PROMPT.format(
+                question=question,
+                history=prompt_enrichment_components.history,
+                answered_sub_questions=sub_question_answer_str,
+                relevant_docs=relevant_docs_str,
+                proposed_answer=answer,
+                persona_specification=persona_contextualized_prompt,
+            )
+        )
+    ]
+
+    try:
+        validation_response = model.invoke(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
+        )
+        refined_answer_quality = binary_string_test_after_answer_separator(
+            text=cast(str, validation_response.content),
+            positive_value=AGENT_POSITIVE_VALUE_STR,
+            separator=AGENT_ANSWER_SEPARATOR,
+        )
+    except LLMTimeoutError:
+        refined_answer_quality = True
+        logger.error("LLM Timeout Error - validate refined answer")
+
+    except LLMRateLimitError:
+        refined_answer_quality = True
+        logger.error("LLM Rate Limit Error - validate refined answer")
+
    refined_agent_stats = RefinedAgentStats(
        revision_doc_efficiency=refined_doc_effectiveness,
        revision_question_efficiency=revision_question_efficiency,
    )

-    logger.debug(f"\n\n---INITIAL ANSWER ---\n\n Answer:\n Agent: {initial_answer}")
-    logger.debug("-" * 10)
-    logger.debug(f"\n\n---REVISED AGENT ANSWER ---\n\n Answer:\n Agent: {answer}")
-
-    logger.debug("-" * 100)
-
-    if state.initial_agent_stats:
-        initial_doc_boost_factor = state.initial_agent_stats.agent_effectiveness.get(
-            "utilized_chunk_ratio", "--"
-        )
-        initial_support_boost_factor = (
-            state.initial_agent_stats.agent_effectiveness.get("support_ratio", "--")
-        )
-        num_initial_verified_docs = state.initial_agent_stats.original_question.get(
-            "num_verified_documents", "--"
-        )
-        initial_verified_docs_avg_score = (
-            state.initial_agent_stats.original_question.get("verified_avg_score", "--")
-        )
-        initial_sub_questions_verified_docs = (
-            state.initial_agent_stats.sub_questions.get("num_verified_documents", "--")
-        )
-
-        logger.debug("INITIAL AGENT STATS")
-        logger.debug(f"Document Boost Factor: {initial_doc_boost_factor}")
-        logger.debug(f"Support Boost Factor: {initial_support_boost_factor}")
-        logger.debug(f"Originally Verified Docs: {num_initial_verified_docs}")
-        logger.debug(
-            f"Originally Verified Docs Avg Score: {initial_verified_docs_avg_score}"
-        )
-        logger.debug(
-            f"Sub-Questions Verified Docs: {initial_sub_questions_verified_docs}"
-        )
-    if refined_agent_stats:
-        logger.debug("-" * 10)
-        logger.debug("REFINED AGENT STATS")
-        logger.debug(
-            f"Revision Doc Factor: {refined_agent_stats.revision_doc_efficiency}"
-        )
-        logger.debug(
-            f"Revision Question Factor: {refined_agent_stats.revision_question_efficiency}"
-        )
-
    agent_refined_end_time = datetime.now()
    if state.agent_refined_start_time:
        agent_refined_duration = (
@@ -325,7 +421,7 @@ def generate_refined_answer(

    return RefinedAnswerUpdate(
        refined_answer=answer,
-        refined_answer_quality=True,  # TODO: replace this with the actual check value
+        refined_answer_quality=refined_answer_quality,
        refined_agent_stats=refined_agent_stats,
        agent_refined_end_time=agent_refined_end_time,
        agent_refined_metrics=agent_refined_metrics,
--- a/backend/onyx/agents/agent_search/deep_search/main/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/states.py
@@ -17,6 +17,7 @@ from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
 from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
 from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
 from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
 from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
@@ -76,6 +77,7 @@ class InitialAnswerUpdate(LoggerUpdate):
    """

    initial_answer: str | None = None
+    answer_error: AgentErrorLog | None = None
    initial_agent_stats: InitialAgentResultStats | None = None
    generated_sub_questions: list[str] = []
    agent_base_end_time: datetime | None = None
@@ -88,6 +90,7 @@ class RefinedAnswerUpdate(RefinedAgentEndStats, LoggerUpdate):
    """

    refined_answer: str | None = None
+    answer_error: AgentErrorLog | None = None
    refined_agent_stats: RefinedAgentStats | None = None
    refined_answer_quality: bool = False

--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
@@ -16,16 +16,44 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
    QueryExpansionUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLog
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    QUERY_REWRITING_PROMPT,
 )
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="Query rewriting failed due to LLM timeout - the original question will be used.",
+    rate_limit="Query rewriting failed due to LLM rate limit - the original question will be used.",
+    general_error="Query rewriting failed due to LLM error - the original question will be used.",
+)


+@log_function_time(print_only=True)
 def expand_queries(
    state: ExpandedRetrievalInput,
    config: RunnableConfig,
@@ -54,13 +82,43 @@ def expand_queries(
        )
    ]

-    llm_response_list = dispatch_separated(
-        llm.stream(prompt=msg), dispatch_subquery(level, question_num, writer)
-    )
+    agent_error: AgentErrorLog | None = None
+    llm_response_list: list[BaseMessage_Content] = []
+    llm_response = ""
+    rewritten_queries = []

-    llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
+    try:
+        llm_response_list = dispatch_separated(
+            llm.stream(
+                prompt=msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
+            ),
+            dispatch_subquery(level, question_num, writer),
+        )
+        llm_response = merge_message_runs(llm_response_list, chunk_separator="")[
+            0
+        ].content
+        rewritten_queries = llm_response.split("\n")
+        log_result = f"Number of expanded queries: {len(rewritten_queries)}"

-    rewritten_queries = llm_response.split("\n")
+    except LLMTimeoutError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - expand queries")
+        log_result = agent_error.error_result
+
+    except LLMRateLimitError:
+        agent_error = AgentErrorLog(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - expand queries")
+        log_result = agent_error.error_result
+    # use subquestion as query if query generation fails

    return QueryExpansionUpdate(
        expanded_queries=rewritten_queries,
@@ -69,7 +127,7 @@ def expand_queries(
                graph_component="shared - expanded retrieval",
                node_name="expand queries",
                node_start_time=node_start_time,
-                result=f"Number of expanded queries: {len(rewritten_queries)}",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/rerank_documents.py
@@ -21,12 +21,15 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.configs.agent_configs import AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS
 from onyx.configs.agent_configs import AGENT_RERANKING_STATS
 from onyx.context.search.models import InferenceSection
-from onyx.context.search.models import SearchRequest
-from onyx.context.search.pipeline import retrieval_preprocessing
+from onyx.context.search.models import RerankingDetails
 from onyx.context.search.postprocessing.postprocessing import rerank_sections
+from onyx.context.search.postprocessing.postprocessing import should_rerank
 from onyx.db.engine import get_session_context_manager
+from onyx.db.search_settings import get_current_search_settings
+from onyx.utils.timing import log_function_time


+@log_function_time(print_only=True)
 def rerank_documents(
    state: ExpandedRetrievalState, config: RunnableConfig
 ) -> DocRerankingUpdate:
@@ -39,6 +42,8 @@ def rerank_documents(

    # Rerank post retrieval and verification. First, create a search query
    # then create the list of reranked sections
+    # If no question defined/question is None in the state, use the original
+    # question from the search request as query

    graph_config = cast(GraphConfig, config["metadata"]["config"])
    question = (
@@ -47,39 +52,28 @@ def rerank_documents(
    assert (
        graph_config.tooling.search_tool
    ), "search_tool must be provided for agentic search"
-    with get_session_context_manager() as db_session:
-        # we ignore some of the user specified fields since this search is
-        # internal to agentic search, but we still want to pass through
-        # persona (for stuff like document sets) and rerank settings
-        # (to not make an unnecessary db call).
-        search_request = SearchRequest(
-            query=question,
-            persona=graph_config.inputs.search_request.persona,
-            rerank_settings=graph_config.inputs.search_request.rerank_settings,
-        )
-        _search_query = retrieval_preprocessing(
-            search_request=search_request,
-            user=graph_config.tooling.search_tool.user,  # bit of a hack
-            llm=graph_config.tooling.fast_llm,
-            db_session=db_session,
-        )

-    # skip section filtering
+    # Note that these are passed in values from the API and are overrides which are typically None
+    rerank_settings = graph_config.inputs.search_request.rerank_settings

-    if (
-        _search_query.rerank_settings
-        and _search_query.rerank_settings.rerank_model_name
-        and _search_query.rerank_settings.num_rerank > 0
-        and len(verified_documents) > 0
-    ):
+    if rerank_settings is None:
+        with get_session_context_manager() as db_session:
+            search_settings = get_current_search_settings(db_session)
+            if not search_settings.disable_rerank_for_streaming:
+                rerank_settings = RerankingDetails.from_db_model(search_settings)
+
+    if should_rerank(rerank_settings) and len(verified_documents) > 0:
        if len(verified_documents) > 1:
            reranked_documents = rerank_sections(
-                _search_query,
-                verified_documents,
+                query_str=question,
+                # if runnable, then rerank_settings is not None
+                rerank_settings=cast(RerankingDetails, rerank_settings),
+                sections_to_rerank=verified_documents,
            )
        else:
-            num = "No" if len(verified_documents) == 0 else "One"
-            logger.warning(f"{num} verified document(s) found, skipping reranking")
+            logger.warning(
+                f"{len(verified_documents)} verified document(s) found, skipping reranking"
+            )
            reranked_documents = verified_documents
    else:
        logger.warning("No reranking settings found, using unranked documents")
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/retrieve_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/retrieve_documents.py
@@ -23,12 +23,15 @@ from onyx.configs.agent_configs import AGENT_RETRIEVAL_STATS
 from onyx.context.search.models import InferenceSection
 from onyx.db.engine import get_session_context_manager
 from onyx.tools.models import SearchQueryInfo
+from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.tool_implementations.search.search_tool import (
    SEARCH_RESPONSE_SUMMARY_ID,
 )
 from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
+from onyx.utils.timing import log_function_time


+@log_function_time(print_only=True)
 def retrieve_documents(
    state: RetrievalInput, config: RunnableConfig
 ) -> DocRetrievalUpdate:
@@ -67,9 +70,12 @@ def retrieve_documents(
    with get_session_context_manager() as db_session:
        for tool_response in search_tool.run(
            query=query_to_retrieve,
-            force_no_rerank=True,
-            alternate_db_session=db_session,
-            retrieved_sections_callback=callback_container.append,
+            override_kwargs=SearchToolOverrideKwargs(
+                force_no_rerank=True,
+                alternate_db_session=db_session,
+                retrieved_sections_callback=callback_container.append,
+                skip_query_analysis=not state.base_search,
+            ),
        ):
            # get retrieved docs to send to the rest of the graph
            if tool_response.id == SEARCH_RESPONSE_SUMMARY_ID:
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
@@ -1,5 +1,7 @@
+from datetime import datetime
 from typing import cast

+from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.runnables.config import RunnableConfig

@@ -10,14 +12,38 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
    DocVerificationUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test,
+)
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.agents.agent_search.shared_graph_utils.utils import (
+    get_langgraph_node_log_string,
+)
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    DOCUMENT_VERIFICATION_PROMPT,
 )
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out. The document could not be verified. The document will be treated as 'relevant'",
+    rate_limit="The LLM encountered a rate limit. The document could not be verified. The document will be treated as 'relevant'",
+    general_error="The LLM encountered an error. The document could not be verified. The document will be treated as 'relevant'",
+)


+@log_function_time(print_only=True)
 def verify_documents(
    state: DocVerificationInput, config: RunnableConfig
 ) -> DocVerificationUpdate:
@@ -26,12 +52,14 @@ def verify_documents(

    Args:
        state (DocVerificationInput): The current state
-        config (RunnableConfig): Configuration containing ProSearchConfig
+        config (RunnableConfig): Configuration containing AgentSearchConfig

    Updates:
        verified_documents: list[InferenceSection]
    """

+    node_start_time = datetime.now()
+
    question = state.question
    retrieved_document_to_verify = state.retrieved_document_to_verify
    document_content = retrieved_document_to_verify.combined_content
@@ -51,12 +79,40 @@ def verify_documents(
        )
    ]

-    response = fast_llm.invoke(msg)
+    response: BaseMessage | None = None

-    verified_documents = []
-    if isinstance(response.content, str) and "yes" in response.content.lower():
-        verified_documents.append(retrieved_document_to_verify)
+    verified_documents = [
+        retrieved_document_to_verify
+    ]  # default is to treat document as relevant
+
+    try:
+        response = fast_llm.invoke(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
+        )
+
+        assert isinstance(response.content, str)
+        if not binary_string_test(
+            text=response.content, positive_value=AGENT_POSITIVE_VALUE_STR
+        ):
+            verified_documents = []
+
+    except LLMTimeoutError:
+        # In this case, we decide to continue and don't raise an error, as
+        # little harm in letting some docs through that are less relevant.
+        logger.error("LLM Timeout Error - verify documents")
+
+    except LLMRateLimitError:
+        # In this case, we decide to continue and don't raise an error, as
+        # little harm in letting some docs through that are less relevant.
+        logger.error("LLM Rate Limit Error - verify documents")

    return DocVerificationUpdate(
        verified_documents=verified_documents,
+        log_messages=[
+            get_langgraph_node_log_string(
+                graph_component="shared - expanded retrieval",
+                node_name="verify documents",
+                node_start_time=node_start_time,
+            )
+        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/states.py
@@ -21,9 +21,13 @@ from onyx.context.search.models import InferenceSection


 class ExpandedRetrievalInput(SubgraphCoreState):
-    question: str = ""
-    base_search: bool = False
+    # exception from 'no default value'for LangGraph input states
+    # Here, sub_question_id default None implies usage for the
+    # original question. This is sometimes needed for nested sub-graphs
+
    sub_question_id: str | None = None
+    question: str
+    base_search: bool


 ## Update/Return States
@@ -34,7 +38,7 @@ class QueryExpansionUpdate(LoggerUpdate, BaseModel):
    log_messages: list[str] = []


-class DocVerificationUpdate(BaseModel):
+class DocVerificationUpdate(LoggerUpdate, BaseModel):
    verified_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []


@@ -88,4 +92,4 @@ class DocVerificationInput(ExpandedRetrievalInput):


 class RetrievalInput(ExpandedRetrievalInput):
-    query_to_retrieve: str = ""
+    query_to_retrieve: str
--- a/backend/onyx/agents/agent_search/run_graph.py
+++ b/backend/onyx/agents/agent_search/run_graph.py
@@ -12,7 +12,7 @@ from onyx.agents.agent_search.deep_search.main.graph_builder import (
    main_graph_builder as main_graph_builder_a,
 )
 from onyx.agents.agent_search.deep_search.main.states import (
-    MainInput as MainInput_a,
+    MainInput as MainInput,
 )
 from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
@@ -21,6 +21,7 @@ from onyx.chat.models import AnswerPacket
 from onyx.chat.models import AnswerStream
 from onyx.chat.models import ExtendedToolResponse
 from onyx.chat.models import RefinedAnswerImprovement
+from onyx.chat.models import StreamingError
 from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import SubQueryPiece
 from onyx.chat.models import SubQuestionPiece
@@ -33,6 +34,7 @@ from onyx.llm.factory import get_default_llms
 from onyx.tools.tool_runner import ToolCallKickoff
 from onyx.utils.logger import setup_logger

+
 logger = setup_logger()

 _COMPILED_GRAPH: CompiledStateGraph | None = None
@@ -72,13 +74,15 @@ def _parse_agent_event(
            return cast(AnswerPacket, event["data"])
        elif event["name"] == "refined_answer_improvement":
            return cast(RefinedAnswerImprovement, event["data"])
+        elif event["name"] == "refined_sub_question_creation_error":
+            return cast(StreamingError, event["data"])
    return None


 def manage_sync_streaming(
    compiled_graph: CompiledStateGraph,
    config: GraphConfig,
-    graph_input: BasicInput | MainInput_a,
+    graph_input: BasicInput | MainInput,
 ) -> Iterable[StreamEvent]:
    message_id = config.persistence.message_id if config.persistence else None
    for event in compiled_graph.stream(
@@ -92,7 +96,7 @@ def manage_sync_streaming(
 def run_graph(
    compiled_graph: CompiledStateGraph,
    config: GraphConfig,
-    input: BasicInput | MainInput_a,
+    input: BasicInput | MainInput,
 ) -> AnswerStream:
    config.behavior.perform_initial_search_decomposition = (
        INITIAL_SEARCH_DECOMPOSITION_ENABLED
@@ -123,9 +127,7 @@ def run_main_graph(
 ) -> AnswerStream:
    compiled_graph = load_compiled_graph()

-    input = MainInput_a(
-        base_question=config.inputs.search_request.query, log_messages=[]
-    )
+    input = MainInput(log_messages=[])

    # Agent search is not a Tool per se, but this is helpful for the frontend
    yield ToolCallKickoff(
@@ -140,7 +142,7 @@ def run_basic_graph(
 ) -> AnswerStream:
    graph = basic_graph_builder()
    compiled_graph = graph.compile()
-    input = BasicInput()
+    input = BasicInput(unused=True)
    return run_graph(compiled_graph, config, input)


@@ -172,9 +174,7 @@ if __name__ == "__main__":
            # search_request.persona = get_persona_by_id(1, None, db_session)
            # config.perform_initial_search_path_decision = False
            config.behavior.perform_initial_search_decomposition = True
-            input = MainInput_a(
-                base_question=config.inputs.search_request.query, log_messages=[]
-            )
+            input = MainInput(log_messages=[])

            tool_responses: list = []
            for output in run_graph(compiled_graph, config, input):
--- a/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
@@ -7,6 +7,7 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.models import (
    AgentPromptEnrichmentComponents,
 )
+from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_persona_agent_prompt_expressions,
 )
@@ -40,13 +41,7 @@ def build_sub_question_answer_prompt(

    date_str = build_date_time_string()

-    # TODO: This should include document metadata and title
-    docs_format_list = [
-        f"Document Number: [D{doc_num + 1}]\nContent: {doc.combined_content}\n\n"
-        for doc_num, doc in enumerate(docs)
-    ]
-
-    docs_str = "\n\n".join(docs_format_list)
+    docs_str = format_docs(docs)

    docs_str = trim_prompt_piece(
        config,
@@ -150,3 +145,38 @@ def get_prompt_enrichment_components(
        history=history,
        date_str=date_str,
    )
+
+
+def binary_string_test(text: str, positive_value: str = "yes") -> bool:
+    """
+    Tests if a string contains a positive value (case-insensitive).
+
+    Args:
+        text: The string to test
+        positive_value: The value to look for (defaults to "yes")
+
+    Returns:
+        True if the positive value is found in the text
+    """
+    return positive_value.lower() in text.lower()
+
+
+def binary_string_test_after_answer_separator(
+    text: str, positive_value: str = "yes", separator: str = "Answer:"
+) -> bool:
+    """
+    Tests if a string contains a positive value (case-insensitive).
+
+    Args:
+        text: The string to test
+        positive_value: The value to look for (defaults to "yes")
+
+    Returns:
+        True if the positive value is found in the text
+    """
+
+    if separator not in text:
+        return False
+    relevant_text = text.split(f"{separator}")[-1]
+
+    return binary_string_test(relevant_text, positive_value)
--- a/backend/onyx/agents/agent_search/shared_graph_utils/calculations.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/calculations.py
@@ -1,7 +1,11 @@
 import numpy as np

+from onyx.agents.agent_search.shared_graph_utils.models import AnswerGenerationDocuments
 from onyx.agents.agent_search.shared_graph_utils.models import RetrievalFitScoreMetrics
 from onyx.agents.agent_search.shared_graph_utils.models import RetrievalFitStats
+from onyx.agents.agent_search.shared_graph_utils.operators import (
+    dedup_inference_section_list,
+)
 from onyx.chat.models import SectionRelevancePiece
 from onyx.context.search.models import InferenceSection
 from onyx.utils.logger import setup_logger
@@ -96,3 +100,106 @@ def get_fit_scores(
    )

    return fit_eval
+
+
+def get_answer_generation_documents(
+    relevant_docs: list[InferenceSection],
+    context_documents: list[InferenceSection],
+    original_question_docs: list[InferenceSection],
+    max_docs: int,
+) -> AnswerGenerationDocuments:
+    """
+    Create a deduplicated list of documents to stream, prioritizing relevant docs.
+
+    Args:
+        relevant_docs: Primary documents to include
+        context_documents: Additional context documents to append
+        original_question_docs: Original question documents to append
+        max_docs: Maximum number of documents to return
+
+    Returns:
+        List of deduplicated documents, limited to max_docs
+    """
+    # get relevant_doc ids
+    relevant_doc_ids = [doc.center_chunk.document_id for doc in relevant_docs]
+
+    # Start with relevant docs or fallback to original question docs
+    streaming_documents = relevant_docs.copy()
+
+    # Use a set for O(1) lookups of document IDs
+    seen_doc_ids = {doc.center_chunk.document_id for doc in streaming_documents}
+
+    # Combine additional documents to check in one iteration
+    additional_docs = context_documents + original_question_docs
+    for doc_idx, doc in enumerate(additional_docs):
+        doc_id = doc.center_chunk.document_id
+        if doc_id not in seen_doc_ids:
+            streaming_documents.append(doc)
+            seen_doc_ids.add(doc_id)
+
+    streaming_documents = dedup_inference_section_list(streaming_documents)
+
+    relevant_streaming_docs = [
+        doc
+        for doc in streaming_documents
+        if doc.center_chunk.document_id in relevant_doc_ids
+    ]
+    relevant_streaming_docs = dedup_sort_inference_section_list(relevant_streaming_docs)
+
+    additional_streaming_docs = [
+        doc
+        for doc in streaming_documents
+        if doc.center_chunk.document_id not in relevant_doc_ids
+    ]
+    additional_streaming_docs = dedup_sort_inference_section_list(
+        additional_streaming_docs
+    )
+
+    for doc in additional_streaming_docs:
+        if doc.center_chunk.score:
+            doc.center_chunk.score += -2.0
+        else:
+            doc.center_chunk.score = -2.0
+
+    sorted_streaming_documents = relevant_streaming_docs + additional_streaming_docs
+
+    return AnswerGenerationDocuments(
+        streaming_documents=sorted_streaming_documents[:max_docs],
+        context_documents=relevant_streaming_docs[:max_docs],
+    )
+
+
+def dedup_sort_inference_section_list(
+    sections: list[InferenceSection],
+) -> list[InferenceSection]:
+    """Deduplicates InferenceSections by document_id and sorts by score.
+
+    Args:
+        sections: List of InferenceSections to deduplicate and sort
+
+    Returns:
+        Deduplicated list of InferenceSections sorted by score in descending order
+    """
+    # dedupe/merge with existing framework
+    sections = dedup_inference_section_list(sections)
+
+    # Use dict to deduplicate by document_id, keeping highest scored version
+    unique_sections: dict[str, InferenceSection] = {}
+    for section in sections:
+        doc_id = section.center_chunk.document_id
+        if doc_id not in unique_sections:
+            unique_sections[doc_id] = section
+            continue
+
+        # Keep version with higher score
+        existing_score = unique_sections[doc_id].center_chunk.score or 0
+        new_score = section.center_chunk.score or 0
+        if new_score > existing_score:
+            unique_sections[doc_id] = section
+
+    # Sort by score in descending order, handling None scores
+    sorted_sections = sorted(
+        unique_sections.values(), key=lambda x: x.center_chunk.score or 0, reverse=True
+    )
+
+    return sorted_sections
--- a/backend/onyx/agents/agent_search/shared_graph_utils/constants.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/constants.py
@@ -0,0 +1,19 @@
+from enum import Enum
+
+AGENT_LLM_TIMEOUT_MESSAGE = "The agent timed out. Please try again."
+AGENT_LLM_ERROR_MESSAGE = "The agent encountered an error. Please try again."
+AGENT_LLM_RATELIMIT_MESSAGE = (
+    "The agent encountered a rate limit error. Please try again."
+)
+LLM_ANSWER_ERROR_MESSAGE = "The question was not answered due to an LLM error."
+
+AGENT_POSITIVE_VALUE_STR = "yes"
+AGENT_NEGATIVE_VALUE_STR = "no"
+
+AGENT_ANSWER_SEPARATOR = "Answer:"
+
+
+class AgentLLMErrorType(str, Enum):
+    TIMEOUT = "timeout"
+    RATE_LIMIT = "rate_limit"
+    GENERAL_ERROR = "general_error"
--- a/backend/onyx/agents/agent_search/shared_graph_utils/models.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/models.py
@@ -1,3 +1,5 @@
+from typing import Any
+
 from pydantic import BaseModel

 from onyx.agents.agent_search.deep_search.main.models import (
@@ -56,6 +58,12 @@ class InitialAgentResultStats(BaseModel):
    agent_effectiveness: dict[str, float | int | None]


+class AgentErrorLog(BaseModel):
+    error_message: str
+    error_type: str
+    error_result: str
+
+
 class RefinedAgentStats(BaseModel):
    revision_doc_efficiency: float | None
    revision_question_efficiency: float | None
@@ -110,6 +118,11 @@ class SubQuestionAnswerResults(BaseModel):
    sub_question_retrieval_stats: AgentChunkRetrievalStats


+class StructuredSubquestionDocuments(BaseModel):
+    cited_documents: list[InferenceSection]
+    context_documents: list[InferenceSection]
+
+
 class CombinedAgentMetrics(BaseModel):
    timings: AgentTimings
    base_metrics: AgentBaseMetrics | None
@@ -126,3 +139,17 @@ class AgentPromptEnrichmentComponents(BaseModel):
    persona_prompts: PersonaPromptExpressions
    history: str
    date_str: str
+
+
+class LLMNodeErrorStrings(BaseModel):
+    timeout: str = "LLM Timeout Error"
+    rate_limit: str = "LLM Rate Limit Error"
+    general_error: str = "General LLM Error"
+
+
+class AnswerGenerationDocuments(BaseModel):
+    streaming_documents: list[InferenceSection]
+    context_documents: list[InferenceSection]
+
+
+BaseMessage_Content = str | list[str | dict[str, Any]]
--- a/backend/onyx/agents/agent_search/shared_graph_utils/operators.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/operators.py
@@ -12,6 +12,13 @@ def dedup_inference_sections(
    return deduped


+def dedup_inference_section_list(
+    list: list[InferenceSection],
+) -> list[InferenceSection]:
+    deduped = _merge_sections(list)
+    return deduped
+
+
 def dedup_question_answer_results(
    question_answer_results_1: list[SubQuestionAnswerResults],
    question_answer_results_2: list[SubQuestionAnswerResults],
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -20,10 +20,18 @@ from onyx.agents.agent_search.models import GraphInputs
 from onyx.agents.agent_search.models import GraphPersistence
 from onyx.agents.agent_search.models import GraphSearchConfig
 from onyx.agents.agent_search.models import GraphTooling
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
 from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
 from onyx.agents.agent_search.shared_graph_utils.models import PersonaPromptExpressions
+from onyx.agents.agent_search.shared_graph_utils.models import (
+    StructuredSubquestionDocuments,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import SubQuestionAnswerResults
+from onyx.agents.agent_search.shared_graph_utils.operators import (
+    dedup_inference_section_list,
+)
 from onyx.chat.models import AnswerPacket
 from onyx.chat.models import AnswerStyleConfig
 from onyx.chat.models import CitationConfig
@@ -34,6 +42,9 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
+)
 from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from onyx.configs.constants import DEFAULT_PERSONA_ID
@@ -46,6 +57,8 @@ from onyx.context.search.models import SearchRequest
 from onyx.db.engine import get_session_context_manager
 from onyx.db.persona import get_persona_by_id
 from onyx.db.persona import Persona
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.llm.interfaces import LLM
 from onyx.prompts.agent_search import (
    ASSISTANT_SYSTEM_PROMPT_DEFAULT,
@@ -58,6 +71,7 @@ from onyx.prompts.agent_search import (
 )
 from onyx.prompts.prompt_utils import handle_onyx_date_awareness
 from onyx.tools.force import ForceUseTool
+from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.tool_constructor import SearchToolConfig
 from onyx.tools.tool_implementations.search.search_tool import (
    SEARCH_RESPONSE_SUMMARY_ID,
@@ -65,8 +79,9 @@ from onyx.tools.tool_implementations.search.search_tool import (
 from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.utils import explicit_tool_calling_supported
+from onyx.utils.logger import setup_logger

-BaseMessage_Content = str | list[str | dict[str, Any]]
+logger = setup_logger()


 # Post-processing
@@ -218,7 +233,10 @@ def get_test_config(
        using_tool_calling_llm=using_tool_calling_llm,
    )

-    chat_session_id = os.environ.get("ONYX_AS_CHAT_SESSION_ID")
+    chat_session_id = (
+        os.environ.get("ONYX_AS_CHAT_SESSION_ID")
+        or "00000000-0000-0000-0000-000000000000"
+    )
    assert (
        chat_session_id is not None
    ), "ONYX_AS_CHAT_SESSION_ID must be set for backend tests"
@@ -341,8 +359,12 @@ def retrieve_search_docs(
    with get_session_context_manager() as db_session:
        for tool_response in search_tool.run(
            query=question,
-            force_no_rerank=True,
-            alternate_db_session=db_session,
+            override_kwargs=SearchToolOverrideKwargs(
+                force_no_rerank=True,
+                alternate_db_session=db_session,
+                retrieved_sections_callback=None,
+                skip_query_analysis=False,
+            ),
        ):
            # get retrieved docs to send to the rest of the graph
            if tool_response.id == SEARCH_RESPONSE_SUMMARY_ID:
@@ -372,8 +394,24 @@ def summarize_history(
        )
    )

-    history_response = llm.invoke(history_context_prompt)
+    try:
+        history_response = llm.invoke(
+            history_context_prompt,
+            timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
+        )
+    except LLMTimeoutError:
+        logger.error("LLM Timeout Error - summarize history")
+        return (
+            history  # this is what is done at this point anyway, so we default to this
+        )
+    except LLMRateLimitError:
+        logger.error("LLM Rate Limit Error - summarize history")
+        return (
+            history  # this is what is done at this point anyway, so we default to this
+        )
+
    assert isinstance(history_response.content, str)
+
    return history_response.content


@@ -439,3 +477,27 @@ def remove_document_citations(text: str) -> str:
    #   \d+  - one or more digits
    #   \]   - literal ] character
    return re.sub(r"\[(?:D|Q)?\d+\]", "", text)
+
+
+def get_deduplicated_structured_subquestion_documents(
+    sub_question_results: list[SubQuestionAnswerResults],
+) -> StructuredSubquestionDocuments:
+    """
+    Extract and deduplicate all cited documents from sub-question results.
+
+    Args:
+        sub_question_results: List of sub-question results containing cited documents
+
+    Returns:
+        Deduplicated list of cited documents
+    """
+    cited_docs = [
+        doc for result in sub_question_results for doc in result.cited_documents
+    ]
+    context_docs = [
+        doc for result in sub_question_results for doc in result.context_documents
+    ]
+    return StructuredSubquestionDocuments(
+        cited_documents=dedup_inference_section_list(cited_docs),
+        context_documents=dedup_inference_section_list(context_docs),
+    )
--- a/backend/onyx/auth/email_utils.py
+++ b/backend/onyx/auth/email_utils.py
@@ -1,7 +1,7 @@
 import smtplib
+from datetime import datetime
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
-from textwrap import dedent

 from onyx.configs.app_configs import EMAIL_CONFIGURED
 from onyx.configs.app_configs import EMAIL_FROM
@@ -13,23 +13,150 @@ from onyx.configs.app_configs import WEB_DOMAIN
 from onyx.configs.constants import TENANT_ID_COOKIE_NAME
 from onyx.db.models import User

+HTML_EMAIL_TEMPLATE = """\
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width" />
+  <title>{title}</title>
+  <style>
+    body, table, td, a {{
+      font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+      text-size-adjust: 100%;
+      margin: 0;
+      padding: 0;
+      -webkit-font-smoothing: antialiased;
+      -webkit-text-size-adjust: none;
+    }}
+    body {{
+      background-color: #f7f7f7;
+      color: #333;
+    }}
+    .body-content {{
+      color: #333;
+    }}
+    .email-container {{
+      width: 100%;
+      max-width: 600px;
+      margin: 0 auto;
+      background-color: #ffffff;
+      border-radius: 6px;
+      overflow: hidden;
+      border: 1px solid #eaeaea;
+    }}
+    .header {{
+      background-color: #000000;
+      padding: 20px;
+      text-align: center;
+    }}
+    .header img {{
+      max-width: 140px;
+    }}
+    .body-content {{
+      padding: 20px 30px;
+    }}
+    .title {{
+      font-size: 20px;
+      font-weight: bold;
+      margin: 0 0 10px;
+    }}
+    .message {{
+      font-size: 16px;
+      line-height: 1.5;
+      margin: 0 0 20px;
+    }}
+    .cta-button {{
+      display: inline-block;
+      padding: 12px 20px;
+      background-color: #000000;
+      color: #ffffff !important;
+      text-decoration: none;
+      border-radius: 4px;
+      font-weight: 500;
+    }}
+    .footer {{
+      font-size: 13px;
+      color: #6A7280;
+      text-align: center;
+      padding: 20px;
+    }}
+    .footer a {{
+      color: #6b7280;
+      text-decoration: underline;
+    }}
+  </style>
+</head>
+<body>
+  <table role="presentation" class="email-container" cellpadding="0" cellspacing="0">
+    <tr>
+      <td class="header">
+        <img
+          style="background-color: #ffffff; border-radius: 8px;"
+          src="https://www.onyx.app/logos/customer/onyx.png"
+          alt="Onyx Logo"
+        >
+      </td>
+    </tr>
+    <tr>
+      <td class="body-content">
+        <h1 class="title">{heading}</h1>
+        <div class="message">
+          {message}
+        </div>
+        {cta_block}
+      </td>
+    </tr>
+    <tr>
+      <td class="footer">
+        © {year} Onyx. All rights reserved.
+        <br>
+        Have questions? Join our Slack community <a href="https://join.slack.com/t/onyx-dot-app/shared_invite/zt-2twesxdr6-5iQitKZQpgq~hYIZ~dv3KA">here</a>.
+      </td>
+    </tr>
+  </table>
+</body>
+</html>
+"""
+
+
+def build_html_email(
+    heading: str, message: str, cta_text: str | None = None, cta_link: str | None = None
+) -> str:
+    if cta_text and cta_link:
+        cta_block = f'<a class="cta-button" href="{cta_link}">{cta_text}</a>'
+    else:
+        cta_block = ""
+    return HTML_EMAIL_TEMPLATE.format(
+        title=heading,
+        heading=heading,
+        message=message,
+        cta_block=cta_block,
+        year=datetime.now().year,
+    )
+

 def send_email(
    user_email: str,
    subject: str,
-    body: str,
+    html_body: str,
+    text_body: str,
    mail_from: str = EMAIL_FROM,
 ) -> None:
    if not EMAIL_CONFIGURED:
        raise ValueError("Email is not configured.")

-    msg = MIMEMultipart()
+    msg = MIMEMultipart("alternative")
    msg["Subject"] = subject
    msg["To"] = user_email
    if mail_from:
        msg["From"] = mail_from

-    msg.attach(MIMEText(body))
+    part_text = MIMEText(text_body, "plain")
+    part_html = MIMEText(html_body, "html")
+
+    msg.attach(part_text)
+    msg.attach(part_html)

    try:
        with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as s:
@@ -40,26 +167,44 @@ def send_email(
        raise e


+def send_subscription_cancellation_email(user_email: str) -> None:
+    # Example usage of the reusable HTML
+    subject = "Your Onyx Subscription Has Been Canceled"
+    heading = "Subscription Canceled"
+    message = (
+        "<p>We’re sorry to see you go.</p>"
+        "<p>Your subscription has been canceled and will end on your next billing date.</p>"
+        "<p>If you change your mind, you can always come back!</p>"
+    )
+    cta_text = "Renew Subscription"
+    cta_link = "https://www.onyx.app/pricing"
+    html_content = build_html_email(heading, message, cta_text, cta_link)
+    text_content = (
+        "We're sorry to see you go.\n"
+        "Your subscription has been canceled and will end on your next billing date.\n"
+        "If you change your mind, visit https://www.onyx.app/pricing"
+    )
+    send_email(user_email, subject, html_content, text_content)
+
+
 def send_user_email_invite(user_email: str, current_user: User) -> None:
    subject = "Invitation to Join Onyx Organization"
-    body = dedent(
-        f"""\
-        Hello,
-
-        You have been invited to join an organization on Onyx.
-
-        To join the organization, please visit the following link:
-
-        {WEB_DOMAIN}/auth/signup?email={user_email}
-
-        You'll be asked to set a password or login with Google to complete your registration.
-
-        Best regards,
-        The Onyx Team
-    """
+    heading = "You've Been Invited!"
+    message = (
+        f"<p>You have been invited by {current_user.email} to join an organization on Onyx.</p>"
+        "<p>To join the organization, please click the button below to set a password "
+        "or login with Google and complete your registration.</p>"
    )
-
-    send_email(user_email, subject, body, current_user.email)
+    cta_text = "Join Organization"
+    cta_link = f"{WEB_DOMAIN}/auth/signup?email={user_email}"
+    html_content = build_html_email(heading, message, cta_text, cta_link)
+    text_content = (
+        f"You have been invited by {current_user.email} to join an organization on Onyx.\n"
+        "To join the organization, please visit the following link:\n"
+        f"{WEB_DOMAIN}/auth/signup?email={user_email}\n"
+        "You'll be asked to set a password or login with Google to complete your registration."
+    )
+    send_email(user_email, subject, html_content, text_content)


 def send_forgot_password_email(
@@ -68,13 +213,15 @@ def send_forgot_password_email(
    mail_from: str = EMAIL_FROM,
    tenant_id: str | None = None,
 ) -> None:
+    # Builds a forgot password email with or without fancy HTML
    subject = "Onyx Forgot Password"
    link = f"{WEB_DOMAIN}/auth/reset-password?token={token}"
    if tenant_id:
        link += f"&{TENANT_ID_COOKIE_NAME}={tenant_id}"
-        # Keep search param same name as cookie for simplicity
-    body = f"Click the following link to reset your password: {link}"
-    send_email(user_email, subject, body, mail_from)
+    message = f"<p>Click the following link to reset your password:</p><p>{link}</p>"
+    html_content = build_html_email("Reset Your Password", message)
+    text_content = f"Click the following link to reset your password: {link}"
+    send_email(user_email, subject, html_content, text_content, mail_from)


 def send_user_verification_email(
@@ -82,7 +229,12 @@ def send_user_verification_email(
    token: str,
    mail_from: str = EMAIL_FROM,
 ) -> None:
+    # Builds a verification email
    subject = "Onyx Email Verification"
    link = f"{WEB_DOMAIN}/auth/verify-email?token={token}"
-    body = f"Click the following link to verify your email address: {link}"
-    send_email(user_email, subject, body, mail_from)
+    message = (
+        f"<p>Click the following link to verify your email address:</p><p>{link}</p>"
+    )
+    html_content = build_html_email("Verify Your Email", message)
+    text_content = f"Click the following link to verify your email address: {link}"
+    send_email(user_email, subject, html_content, text_content, mail_from)
--- a/backend/onyx/background/celery/apps/primary.py
+++ b/backend/onyx/background/celery/apps/primary.py
@@ -144,7 +144,6 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None:
    # As currently designed, when this worker starts as "primary", we reinitialize redis
    # to a clean state (for our purposes, anyway)
    r.delete(OnyxRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK)
-    r.delete(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)

    r.delete(OnyxRedisConstants.ACTIVE_FENCES)

--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -19,6 +19,7 @@ BEAT_EXPIRES_DEFAULT = 15 * 60  # 15 minutes (in seconds)

 # hack to slow down task dispatch in the cloud until
 # we have a better implementation (backpressure, etc)
+# Note that DynamicTenantScheduler can adjust the runtime value for this via Redis
 CLOUD_BEAT_MULTIPLIER_DEFAULT = 8.0

 # tasks that run in either self-hosted on cloud
@@ -56,16 +57,7 @@ beat_task_templates.extend(
        {
            "name": "check-for-pruning",
            "task": OnyxCeleryTask.CHECK_FOR_PRUNING,
-            "schedule": timedelta(hours=1),
-            "options": {
-                "priority": OnyxCeleryPriority.MEDIUM,
-                "expires": BEAT_EXPIRES_DEFAULT,
-            },
-        },
-        {
-            "name": "monitor-vespa-sync",
-            "task": OnyxCeleryTask.MONITOR_VESPA_SYNC,
-            "schedule": timedelta(seconds=5),
+            "schedule": timedelta(seconds=20),
            "options": {
                "priority": OnyxCeleryPriority.MEDIUM,
                "expires": BEAT_EXPIRES_DEFAULT,
@@ -141,14 +133,14 @@ def make_cloud_generator_task(task: dict[str, Any]) -> dict[str, Any]:
    return cloud_task


-# tasks that only run in the cloud
+# tasks that only run in the cloud and are system wide
 # the name attribute must start with ONYX_CLOUD_CELERY_TASK_PREFIX = "cloud" to be seen
 # by the DynamicTenantScheduler as system wide task and not a per tenant task
-beat_system_tasks: list[dict] = [
+beat_cloud_tasks: list[dict] = [
    # cloud specific tasks
    {
-        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-alembic",
-        "task": OnyxCeleryTask.CLOUD_CHECK_ALEMBIC,
+        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-alembic",
+        "task": OnyxCeleryTask.CLOUD_MONITOR_ALEMBIC,
        "schedule": timedelta(hours=1),
        "options": {
            "queue": OnyxCeleryQueues.MONITORING,
@@ -156,11 +148,37 @@ beat_system_tasks: list[dict] = [
            "expires": BEAT_EXPIRES_DEFAULT,
        },
    },
+    {
+        "name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-celery-queues",
+        "task": OnyxCeleryTask.CLOUD_MONITOR_CELERY_QUEUES,
+        "schedule": timedelta(seconds=30),
+        "options": {
+            "queue": OnyxCeleryQueues.MONITORING,
+            "priority": OnyxCeleryPriority.HIGH,
+            "expires": BEAT_EXPIRES_DEFAULT,
+        },
+    },
 ]

+# tasks that only run self hosted
 tasks_to_schedule: list[dict] = []
 if not MULTI_TENANT:
-    tasks_to_schedule = beat_task_templates
+    tasks_to_schedule.extend(
+        [
+            {
+                "name": "monitor-celery-queues",
+                "task": OnyxCeleryTask.MONITOR_CELERY_QUEUES,
+                "schedule": timedelta(seconds=10),
+                "options": {
+                    "priority": OnyxCeleryPriority.MEDIUM,
+                    "expires": BEAT_EXPIRES_DEFAULT,
+                    "queue": OnyxCeleryQueues.MONITORING,
+                },
+            },
+        ]
+    )
+
+    tasks_to_schedule.extend(beat_task_templates)


 def generate_cloud_tasks(
@@ -180,23 +198,24 @@ def generate_cloud_tasks(
    if beat_multiplier <= 0:
        raise ValueError("beat_multiplier must be positive!")

-    # start with the incoming beat tasks
-    cloud_tasks: list[dict] = copy.deepcopy(beat_tasks)
+    cloud_tasks: list[dict] = []

-    # generate our cloud tasks from the templates
+    # generate our tenant aware cloud tasks from the templates
    for beat_template in beat_templates:
        cloud_task = make_cloud_generator_task(beat_template)
        cloud_tasks.append(cloud_task)

-    # factor in the cloud multiplier
+    # factor in the cloud multiplier for the above
    for cloud_task in cloud_tasks:
        cloud_task["schedule"] = cloud_task["schedule"] * beat_multiplier

+    # add the fixed cloud/system beat tasks. No multiplier for these.
+    cloud_tasks.extend(copy.deepcopy(beat_tasks))
    return cloud_tasks


 def get_cloud_tasks_to_schedule(beat_multiplier: float) -> list[dict[str, Any]]:
-    return generate_cloud_tasks(beat_system_tasks, beat_task_templates, beat_multiplier)
+    return generate_cloud_tasks(beat_cloud_tasks, beat_task_templates, beat_multiplier)


 def get_tasks_to_schedule() -> list[dict[str, Any]]:
--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -1,10 +1,14 @@
+import traceback
 from datetime import datetime
 from datetime import timezone
+from typing import Any
+from typing import cast

 from celery import Celery
 from celery import shared_task
 from celery import Task
 from celery.exceptions import SoftTimeLimitExceeded
+from redis import Redis
 from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session

@@ -12,18 +16,35 @@ from onyx.background.celery.apps.app_base import task_logger
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import OnyxCeleryTask
+from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
+from onyx.db.connector import fetch_connector_by_id
+from onyx.db.connector_credential_pair import add_deletion_failure_message
+from onyx.db.connector_credential_pair import (
+    delete_connector_credential_pair__no_commit,
+)
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
+from onyx.db.document import get_document_ids_for_connector_credential_pair
+from onyx.db.document_set import delete_document_set_cc_pair_relationship__no_commit
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import ConnectorCredentialPairStatus
+from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
+from onyx.db.index_attempt import delete_index_attempts
 from onyx.db.search_settings import get_all_search_settings
 from onyx.db.sync_record import cleanup_sync_records
 from onyx.db.sync_record import insert_sync_record
+from onyx.db.sync_record import update_sync_record_status
 from onyx.redis.redis_connector import RedisConnector
+from onyx.redis.redis_connector_delete import RedisConnectorDelete
 from onyx.redis.redis_connector_delete import RedisConnectorDeletePayload
 from onyx.redis.redis_pool import get_redis_client
+from onyx.redis.redis_pool import get_redis_replica_client
+from onyx.utils.variable_functionality import (
+    fetch_versioned_implementation_with_fallback,
+)
+from onyx.utils.variable_functionality import noop_fallback


 class TaskDependencyError(RuntimeError):
@@ -42,6 +63,7 @@ def check_for_connector_deletion_task(
    self: Task, *, tenant_id: str | None
 ) -> bool | None:
    r = get_redis_client(tenant_id=tenant_id)
+    r_replica = get_redis_replica_client(tenant_id=tenant_id)

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK,
@@ -77,6 +99,18 @@ def check_for_connector_deletion_task(
                    # clear the stop signal if it exists ... no longer needed
                    redis_connector.stop.set_fence(False)

+        lock_beat.reacquire()
+        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
+        for key in keys:
+            key_bytes = cast(bytes, key)
+
+            if not r.exists(key_bytes):
+                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
+                continue
+
+            key_str = key_bytes.decode("utf-8")
+            if key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):
+                monitor_connector_deletion_taskset(tenant_id, key_bytes, r)
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
@@ -212,3 +246,158 @@ def try_generate_document_cc_pair_cleanup_tasks(
        redis_connector.delete.set_fence(fence_payload)

    return tasks_generated
+
+
+def monitor_connector_deletion_taskset(
+    tenant_id: str | None, key_bytes: bytes, r: Redis
+) -> None:
+    fence_key = key_bytes.decode("utf-8")
+    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
+    if cc_pair_id_str is None:
+        task_logger.warning(f"could not parse cc_pair_id from {fence_key}")
+        return
+
+    cc_pair_id = int(cc_pair_id_str)
+
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+
+    fence_data = redis_connector.delete.payload
+    if not fence_data:
+        task_logger.warning(
+            f"Connector deletion - fence payload invalid: cc_pair={cc_pair_id}"
+        )
+        return
+
+    if fence_data.num_tasks is None:
+        # the fence is setting up but isn't ready yet
+        return
+
+    remaining = redis_connector.delete.get_remaining()
+    task_logger.info(
+        f"Connector deletion progress: cc_pair={cc_pair_id} remaining={remaining} initial={fence_data.num_tasks}"
+    )
+    if remaining > 0:
+        with get_session_with_tenant(tenant_id) as db_session:
+            update_sync_record_status(
+                db_session=db_session,
+                entity_id=cc_pair_id,
+                sync_type=SyncType.CONNECTOR_DELETION,
+                sync_status=SyncStatus.IN_PROGRESS,
+                num_docs_synced=remaining,
+            )
+        return
+
+    with get_session_with_tenant(tenant_id) as db_session:
+        cc_pair = get_connector_credential_pair_from_id(
+            db_session=db_session,
+            cc_pair_id=cc_pair_id,
+        )
+        if not cc_pair:
+            task_logger.warning(
+                f"Connector deletion - cc_pair not found: cc_pair={cc_pair_id}"
+            )
+            return
+
+        try:
+            doc_ids = get_document_ids_for_connector_credential_pair(
+                db_session, cc_pair.connector_id, cc_pair.credential_id
+            )
+            if len(doc_ids) > 0:
+                # NOTE(rkuo): if this happens, documents somehow got added while
+                # deletion was in progress. Likely a bug gating off pruning and indexing
+                # work before deletion starts.
+                task_logger.warning(
+                    "Connector deletion - documents still found after taskset completion. "
+                    "Clearing the current deletion attempt and allowing deletion to restart: "
+                    f"cc_pair={cc_pair_id} "
+                    f"docs_deleted={fence_data.num_tasks} "
+                    f"docs_remaining={len(doc_ids)}"
+                )
+
+                # We don't want to waive off why we get into this state, but resetting
+                # our attempt and letting the deletion restart is a good way to recover
+                redis_connector.delete.reset()
+                raise RuntimeError(
+                    "Connector deletion - documents still found after taskset completion"
+                )
+
+            # clean up the rest of the related Postgres entities
+            # index attempts
+            delete_index_attempts(
+                db_session=db_session,
+                cc_pair_id=cc_pair_id,
+            )
+
+            # document sets
+            delete_document_set_cc_pair_relationship__no_commit(
+                db_session=db_session,
+                connector_id=cc_pair.connector_id,
+                credential_id=cc_pair.credential_id,
+            )
+
+            # user groups
+            cleanup_user_groups = fetch_versioned_implementation_with_fallback(
+                "onyx.db.user_group",
+                "delete_user_group_cc_pair_relationship__no_commit",
+                noop_fallback,
+            )
+            cleanup_user_groups(
+                cc_pair_id=cc_pair_id,
+                db_session=db_session,
+            )
+
+            # finally, delete the cc-pair
+            delete_connector_credential_pair__no_commit(
+                db_session=db_session,
+                connector_id=cc_pair.connector_id,
+                credential_id=cc_pair.credential_id,
+            )
+            # if there are no credentials left, delete the connector
+            connector = fetch_connector_by_id(
+                db_session=db_session,
+                connector_id=cc_pair.connector_id,
+            )
+            if not connector or not len(connector.credentials):
+                task_logger.info(
+                    "Connector deletion - Found no credentials left for connector, deleting connector"
+                )
+                db_session.delete(connector)
+            db_session.commit()
+
+            update_sync_record_status(
+                db_session=db_session,
+                entity_id=cc_pair_id,
+                sync_type=SyncType.CONNECTOR_DELETION,
+                sync_status=SyncStatus.SUCCESS,
+                num_docs_synced=fence_data.num_tasks,
+            )
+
+        except Exception as e:
+            db_session.rollback()
+            stack_trace = traceback.format_exc()
+            error_message = f"Error: {str(e)}\n\nStack Trace:\n{stack_trace}"
+            add_deletion_failure_message(db_session, cc_pair_id, error_message)
+
+            update_sync_record_status(
+                db_session=db_session,
+                entity_id=cc_pair_id,
+                sync_type=SyncType.CONNECTOR_DELETION,
+                sync_status=SyncStatus.FAILED,
+                num_docs_synced=fence_data.num_tasks,
+            )
+
+            task_logger.exception(
+                f"Connector deletion exceptioned: "
+                f"cc_pair={cc_pair_id} connector={cc_pair.connector_id} credential={cc_pair.credential_id}"
+            )
+            raise e
+
+    task_logger.info(
+        f"Connector deletion succeeded: "
+        f"cc_pair={cc_pair_id} "
+        f"connector={cc_pair.connector_id} "
+        f"credential={cc_pair.credential_id} "
+        f"docs_deleted={fence_data.num_tasks}"
+    )
+
+    redis_connector.delete.reset()
--- a/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
+++ b/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py
@@ -175,6 +175,24 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str | None) -> bool
                )

            r.set(OnyxRedisSignals.BLOCK_VALIDATE_PERMISSION_SYNC_FENCES, 1, ex=300)
+
+        # use a lookup table to find active fences. We still have to verify the fence
+        # exists since it is an optimization and not the source of truth.
+        lock_beat.reacquire()
+        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
+        for key in keys:
+            key_bytes = cast(bytes, key)
+
+            if not r.exists(key_bytes):
+                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
+                continue
+
+            key_str = key_bytes.decode("utf-8")
+            if key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):
+                with get_session_with_tenant(tenant_id) as db_session:
+                    monitor_ccpair_permissions_taskset(
+                        tenant_id, key_bytes, r, db_session
+                    )
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
@@ -349,6 +367,7 @@ def connector_permission_sync_generator_task(
        OnyxRedisLocks.CONNECTOR_DOC_PERMISSIONS_SYNC_LOCK_PREFIX
        + f"_{redis_connector.id}",
        timeout=CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT,
+        thread_local=False,
    )

    acquired = lock.acquire(blocking=False)
@@ -756,7 +775,7 @@ class PermissionSyncCallback(IndexingHeartbeatInterface):
            raise


-"""Monitoring CCPair permissions utils, called in monitor_vespa_sync"""
+"""Monitoring CCPair permissions utils"""


 def monitor_ccpair_permissions_taskset(
--- a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py
+++ b/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py
@@ -26,11 +26,11 @@ from ee.onyx.external_permissions.sync_params import (
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_redis import celery_find_task
 from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
+from onyx.background.error_logging import emit_background_error
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.constants import CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
-from onyx.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
@@ -72,18 +72,26 @@ def _is_external_group_sync_due(cc_pair: ConnectorCredentialPair) -> bool:
    """Returns boolean indicating if external group sync is due."""

    if cc_pair.access_type != AccessType.SYNC:
-        return False
-
-    # skip external group sync if not active
-    if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE:
+        task_logger.error(
+            f"Recieved non-sync CC Pair {cc_pair.id} for external "
+            f"group sync. Actual access type: {cc_pair.access_type}"
+        )
        return False

    if cc_pair.status == ConnectorCredentialPairStatus.DELETING:
+        task_logger.debug(
+            f"Skipping group sync for CC Pair {cc_pair.id} - "
+            f"CC Pair is being deleted"
+        )
        return False

    # If there is not group sync function for the connector, we don't run the sync
    # This is fine because all sources dont necessarily have a concept of groups
    if not GROUP_PERMISSIONS_FUNC_MAP.get(cc_pair.connector.source):
+        task_logger.debug(
+            f"Skipping group sync for CC Pair {cc_pair.id} - "
+            f"no group sync function for {cc_pair.connector.source}"
+        )
        return False

    # If the last sync is None, it has never been run so we run the sync
@@ -125,6 +133,9 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> bool

    # these tasks should never overlap
    if not lock_beat.acquire(blocking=False):
+        task_logger.warning(
+            f"Failed to acquire beat lock for external group sync: {tenant_id}"
+        )
        return None

    try:
@@ -205,20 +216,12 @@ def try_creating_external_group_sync_task(

    redis_connector = RedisConnector(tenant_id, cc_pair_id)

-    LOCK_TIMEOUT = 30
-
-    lock: RedisLock = r.lock(
-        DANSWER_REDIS_FUNCTION_LOCK_PREFIX + "try_generate_external_group_sync_tasks",
-        timeout=LOCK_TIMEOUT,
-    )
-
-    acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2)
-    if not acquired:
-        return None
-
    try:
        # Dont kick off a new sync if the previous one is still running
        if redis_connector.external_group_sync.fenced:
+            logger.warning(
+                f"Skipping external group sync for CC Pair {cc_pair_id} - already running."
+            )
            return None

        redis_connector.external_group_sync.generator_clear()
@@ -269,9 +272,6 @@ def try_creating_external_group_sync_task(
            f"Unexpected exception while trying to create external group sync task: cc_pair={cc_pair_id}"
        )
        return None
-    finally:
-        if lock.owned():
-            lock.release()

    return payload_id

@@ -304,22 +304,26 @@ def connector_external_group_sync_generator_task(
    start = time.monotonic()
    while True:
        if time.monotonic() - start > CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT:
-            raise ValueError(
+            msg = (
                f"connector_external_group_sync_generator_task - timed out waiting for fence to be ready: "
                f"fence={redis_connector.external_group_sync.fence_key}"
            )
+            emit_background_error(msg, cc_pair_id=cc_pair_id)
+            raise ValueError(msg)

        if not redis_connector.external_group_sync.fenced:  # The fence must exist
-            raise ValueError(
+            msg = (
                f"connector_external_group_sync_generator_task - fence not found: "
                f"fence={redis_connector.external_group_sync.fence_key}"
            )
+            emit_background_error(msg, cc_pair_id=cc_pair_id)
+            raise ValueError(msg)

        payload = redis_connector.external_group_sync.payload  # The payload must exist
        if not payload:
-            raise ValueError(
-                "connector_external_group_sync_generator_task: payload invalid or not found"
-            )
+            msg = "connector_external_group_sync_generator_task: payload invalid or not found"
+            emit_background_error(msg, cc_pair_id=cc_pair_id)
+            raise ValueError(msg)

        if payload.celery_task_id is None:
            logger.info(
@@ -344,9 +348,9 @@ def connector_external_group_sync_generator_task(

    acquired = lock.acquire(blocking=False)
    if not acquired:
-        task_logger.warning(
-            f"External group sync task already running, exiting...: cc_pair={cc_pair_id}"
-        )
+        msg = f"External group sync task already running, exiting...: cc_pair={cc_pair_id}"
+        emit_background_error(msg, cc_pair_id=cc_pair_id)
+        task_logger.error(msg)
        return None

    try:
@@ -367,9 +371,9 @@ def connector_external_group_sync_generator_task(

            ext_group_sync_func = GROUP_PERMISSIONS_FUNC_MAP.get(source_type)
            if ext_group_sync_func is None:
-                raise ValueError(
-                    f"No external group sync func found for {source_type} for cc_pair: {cc_pair_id}"
-                )
+                msg = f"No external group sync func found for {source_type} for cc_pair: {cc_pair_id}"
+                emit_background_error(msg, cc_pair_id=cc_pair_id)
+                raise ValueError(msg)

            logger.info(
                f"Syncing external groups for {source_type} for cc_pair: {cc_pair_id}"
@@ -400,9 +404,9 @@ def connector_external_group_sync_generator_task(
                sync_status=SyncStatus.SUCCESS,
            )
    except Exception as e:
-        task_logger.exception(
-            f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}"
-        )
+        msg = f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}"
+        task_logger.exception(msg)
+        emit_background_error(msg + f"\n\n{e}", cc_pair_id=cc_pair_id)

        with get_session_with_tenant(tenant_id) as db_session:
            update_sync_record_status(
@@ -492,9 +496,11 @@ def validate_external_group_sync_fence(
    fence_key = key_bytes.decode("utf-8")
    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
    if cc_pair_id_str is None:
-        task_logger.warning(
+        msg = (
            f"validate_external_group_sync_fence - could not parse id from {fence_key}"
        )
+        emit_background_error(msg)
+        task_logger.error(msg)
        return

    cc_pair_id = int(cc_pair_id_str)
@@ -509,12 +515,14 @@ def validate_external_group_sync_fence(
    try:
        payload = redis_connector.external_group_sync.payload
    except ValidationError:
-        task_logger.exception(
+        msg = (
            "validate_external_group_sync_fence - "
            "Resetting fence because fence schema is out of date: "
            f"cc_pair={cc_pair_id} "
            f"fence={fence_key}"
        )
+        task_logger.exception(msg)
+        emit_background_error(msg, cc_pair_id=cc_pair_id)

        redis_connector.external_group_sync.reset()
        return
@@ -551,12 +559,15 @@ def validate_external_group_sync_fence(
    # return

    # celery tasks don't exist and the active signal has expired, possibly due to a crash. Clean it up.
-    logger.warning(
-        "validate_external_group_sync_fence - "
-        "Resetting fence because no associated celery tasks were found: "
-        f"cc_pair={cc_pair_id} "
-        f"fence={fence_key} "
-        f"payload_id={payload.id}"
+    emit_background_error(
+        message=(
+            "validate_external_group_sync_fence - "
+            "Resetting fence because no associated celery tasks were found: "
+            f"cc_pair={cc_pair_id} "
+            f"fence={fence_key} "
+            f"payload_id={payload.id}"
+        ),
+        cc_pair_id=cc_pair_id,
    )

    redis_connector.external_group_sync.reset()
--- a/backend/onyx/background/celery/tasks/indexing/tasks.py
+++ b/backend/onyx/background/celery/tasks/indexing/tasks.py
@@ -6,13 +6,18 @@ from datetime import datetime
 from datetime import timezone
 from http import HTTPStatus
 from time import sleep
+from typing import Any
+from typing import cast

 import sentry_sdk
 from celery import shared_task
 from celery import Task
 from celery.exceptions import SoftTimeLimitExceeded
+from celery.result import AsyncResult
+from celery.states import READY_STATES
 from redis import Redis
 from redis.lock import Lock as RedisLock
+from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.celery_utils import httpx_init_vespa_pool
@@ -30,6 +35,7 @@ from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_INDEXING_LOCK_TIMEOUT
 from onyx.configs.constants import CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT
 from onyx.configs.constants import OnyxCeleryTask
+from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.configs.constants import OnyxRedisSignals
 from onyx.db.connector import mark_ccpair_with_indexing_trigger
@@ -37,6 +43,7 @@ from onyx.db.connector_credential_pair import fetch_connector_credential_pairs
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.enums import IndexingMode
+from onyx.db.enums import IndexingStatus
 from onyx.db.index_attempt import get_index_attempt
 from onyx.db.index_attempt import get_last_attempt_for_cc_pair
 from onyx.db.index_attempt import mark_attempt_canceled
@@ -47,9 +54,12 @@ from onyx.db.swap_index import check_index_swap
 from onyx.natural_language_processing.search_nlp_models import EmbeddingModel
 from onyx.natural_language_processing.search_nlp_models import warm_up_bi_encoder
 from onyx.redis.redis_connector import RedisConnector
+from onyx.redis.redis_connector_index import RedisConnectorIndex
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.redis.redis_pool import redis_lock_dump
+from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
+from onyx.redis.redis_utils import is_fence
 from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import global_version
 from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
@@ -60,6 +70,150 @@ from shared_configs.configs import SENTRY_DSN
 logger = setup_logger()


+def monitor_ccpair_indexing_taskset(
+    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
+) -> None:
+    # if the fence doesn't exist, there's nothing to do
+    fence_key = key_bytes.decode("utf-8")
+    composite_id = RedisConnector.get_id_from_fence_key(fence_key)
+    if composite_id is None:
+        task_logger.warning(
+            f"Connector indexing: could not parse composite_id from {fence_key}"
+        )
+        return
+
+    # parse out metadata and initialize the helper class with it
+    parts = composite_id.split("/")
+    if len(parts) != 2:
+        return
+
+    cc_pair_id = int(parts[0])
+    search_settings_id = int(parts[1])
+
+    redis_connector = RedisConnector(tenant_id, cc_pair_id)
+    redis_connector_index = redis_connector.new_index(search_settings_id)
+    if not redis_connector_index.fenced:
+        return
+
+    payload = redis_connector_index.payload
+    if not payload:
+        return
+
+    elapsed_started_str = None
+    if payload.started:
+        elapsed_started = datetime.now(timezone.utc) - payload.started
+        elapsed_started_str = f"{elapsed_started.total_seconds():.2f}"
+
+    elapsed_submitted = datetime.now(timezone.utc) - payload.submitted
+
+    progress = redis_connector_index.get_progress()
+    if progress is not None:
+        task_logger.info(
+            f"Connector indexing progress: "
+            f"attempt={payload.index_attempt_id} "
+            f"cc_pair={cc_pair_id} "
+            f"search_settings={search_settings_id} "
+            f"progress={progress} "
+            f"elapsed_submitted={elapsed_submitted.total_seconds():.2f} "
+            f"elapsed_started={elapsed_started_str}"
+        )
+
+    if payload.index_attempt_id is None or payload.celery_task_id is None:
+        # the task is still setting up
+        return
+
+    # never use any blocking methods on the result from inside a task!
+    result: AsyncResult = AsyncResult(payload.celery_task_id)
+
+    # inner/outer/inner double check pattern to avoid race conditions when checking for
+    # bad state
+
+    # Verify: if the generator isn't complete, the task must not be in READY state
+    # inner = get_completion / generator_complete not signaled
+    # outer = result.state in READY state
+    status_int = redis_connector_index.get_completion()
+    if status_int is None:  # inner signal not set ... possible error
+        task_state = result.state
+        if (
+            task_state in READY_STATES
+        ):  # outer signal in terminal state ... possible error
+            # Now double check!
+            if redis_connector_index.get_completion() is None:
+                # inner signal still not set (and cannot change when outer result_state is READY)
+                # Task is finished but generator complete isn't set.
+                # We have a problem! Worker may have crashed.
+                task_result = str(result.result)
+                task_traceback = str(result.traceback)
+
+                msg = (
+                    f"Connector indexing aborted or exceptioned: "
+                    f"attempt={payload.index_attempt_id} "
+                    f"celery_task={payload.celery_task_id} "
+                    f"cc_pair={cc_pair_id} "
+                    f"search_settings={search_settings_id} "
+                    f"elapsed_submitted={elapsed_submitted.total_seconds():.2f} "
+                    f"result.state={task_state} "
+                    f"result.result={task_result} "
+                    f"result.traceback={task_traceback}"
+                )
+                task_logger.warning(msg)
+
+                try:
+                    index_attempt = get_index_attempt(
+                        db_session, payload.index_attempt_id
+                    )
+                    if index_attempt:
+                        if (
+                            index_attempt.status != IndexingStatus.CANCELED
+                            and index_attempt.status != IndexingStatus.FAILED
+                        ):
+                            mark_attempt_failed(
+                                index_attempt_id=payload.index_attempt_id,
+                                db_session=db_session,
+                                failure_reason=msg,
+                            )
+                except Exception:
+                    task_logger.exception(
+                        "Connector indexing - Transient exception marking index attempt as failed: "
+                        f"attempt={payload.index_attempt_id} "
+                        f"tenant={tenant_id} "
+                        f"cc_pair={cc_pair_id} "
+                        f"search_settings={search_settings_id}"
+                    )
+
+                redis_connector_index.reset()
+        return
+
+    if redis_connector_index.watchdog_signaled():
+        # if the generator is complete, don't clean up until the watchdog has exited
+        task_logger.info(
+            f"Connector indexing - Delaying finalization until watchdog has exited: "
+            f"attempt={payload.index_attempt_id} "
+            f"cc_pair={cc_pair_id} "
+            f"search_settings={search_settings_id} "
+            f"progress={progress} "
+            f"elapsed_submitted={elapsed_submitted.total_seconds():.2f} "
+            f"elapsed_started={elapsed_started_str}"
+        )
+
+        return
+
+    status_enum = HTTPStatus(status_int)
+
+    task_logger.info(
+        f"Connector indexing finished: "
+        f"attempt={payload.index_attempt_id} "
+        f"cc_pair={cc_pair_id} "
+        f"search_settings={search_settings_id} "
+        f"progress={progress} "
+        f"status={status_enum.name} "
+        f"elapsed_submitted={elapsed_submitted.total_seconds():.2f} "
+        f"elapsed_started={elapsed_started_str}"
+    )
+
+    redis_connector_index.reset()
+
+
@shared_task(
    name=OnyxCeleryTask.CHECK_FOR_INDEXING,
    soft_time_limit=300,
@@ -91,6 +245,25 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
    try:
        locked = True

+        # SPECIAL 0/3: sync lookup table for active fences
+        # we want to run this less frequently than the overall task
+        if not redis_client.exists(OnyxRedisSignals.BLOCK_BUILD_FENCE_LOOKUP_TABLE):
+            # build a lookup table of existing fences
+            # this is just a migration concern and should be unnecessary once
+            # lookup tables are rolled out
+            for key_bytes in redis_client_replica.scan_iter(
+                count=SCAN_ITER_COUNT_DEFAULT
+            ):
+                if is_fence(key_bytes) and not redis_client.sismember(
+                    OnyxRedisConstants.ACTIVE_FENCES, key_bytes
+                ):
+                    logger.warning(f"Adding {key_bytes} to the lookup table.")
+                    redis_client.sadd(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
+
+            redis_client.set(OnyxRedisSignals.BLOCK_BUILD_FENCE_LOOKUP_TABLE, 1, ex=300)
+
+        # 1/3: KICKOFF
+
        # check for search settings swap
        with get_session_with_tenant(tenant_id=tenant_id) as db_session:
            old_search_settings = check_index_swap(db_session=db_session)
@@ -197,6 +370,8 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:

        lock_beat.reacquire()

+        # 2/3: VALIDATE
+
        # Fail any index attempts in the DB that don't have fences
        # This shouldn't ever happen!
        with get_session_with_tenant(tenant_id) as db_session:
@@ -236,6 +411,26 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None:
                task_logger.exception("Exception while validating indexing fences")

            redis_client.set(OnyxRedisSignals.BLOCK_VALIDATE_INDEXING_FENCES, 1, ex=60)
+
+        # 3/3: FINALIZE
+        lock_beat.reacquire()
+        keys = cast(
+            set[Any], redis_client_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES)
+        )
+        for key in keys:
+            key_bytes = cast(bytes, key)
+
+            if not redis_client.exists(key_bytes):
+                redis_client.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
+                continue
+
+            key_str = key_bytes.decode("utf-8")
+            if key_str.startswith(RedisConnectorIndex.FENCE_PREFIX):
+                with get_session_with_tenant(tenant_id) as db_session:
+                    monitor_ccpair_indexing_taskset(
+                        tenant_id, key_bytes, redis_client_replica, db_session
+                    )
+
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
--- a/backend/onyx/background/celery/tasks/monitoring/tasks.py
+++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py
@@ -17,7 +17,8 @@ from sqlalchemy import text
 from sqlalchemy.orm import Session

 from onyx.background.celery.apps.app_base import task_logger
-from onyx.background.celery.tasks.vespa.tasks import celery_get_queue_length
+from onyx.background.celery.celery_redis import celery_get_queue_length
+from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
 from onyx.configs.constants import ONYX_CLOUD_TENANT_ID
 from onyx.configs.constants import OnyxCeleryQueues
@@ -189,9 +190,9 @@ def _build_connector_start_latency_metric(
        desired_start_time = cc_pair.connector.time_created
    else:
        if not cc_pair.connector.refresh_freq:
-            task_logger.error(
-                "Found non-initial index attempt for connector "
-                "without refresh_freq. This should never happen."
+            task_logger.debug(
+                "Connector has no refresh_freq and this is a non-initial index attempt. "
+                "Assuming user manually triggered indexing, so we'll skip start latency metric."
            )
            return None

@@ -722,7 +723,7 @@ def monitor_background_processes(self: Task, *, tenant_id: str | None) -> None:


@shared_task(
-    name=OnyxCeleryTask.CLOUD_CHECK_ALEMBIC,
+    name=OnyxCeleryTask.CLOUD_MONITOR_ALEMBIC,
 )
 def cloud_check_alembic() -> bool | None:
    """A task to verify that all tenants are on the same alembic revision.
@@ -852,3 +853,55 @@ def cloud_check_alembic() -> bool | None:
        f"cloud_check_alembic finished: num_tenants={len(tenant_ids)} elapsed={time_elapsed:.2f}"
    )
    return True
+
+
+@shared_task(
+    name=OnyxCeleryTask.CLOUD_MONITOR_CELERY_QUEUES, ignore_result=True, bind=True
+)
+def cloud_monitor_celery_queues(
+    self: Task,
+) -> None:
+    return monitor_celery_queues_helper(self)
+
+
+@shared_task(name=OnyxCeleryTask.MONITOR_CELERY_QUEUES, ignore_result=True, bind=True)
+def monitor_celery_queues(self: Task, *, tenant_id: str | None) -> None:
+    return monitor_celery_queues_helper(self)
+
+
+def monitor_celery_queues_helper(
+    task: Task,
+) -> None:
+    """A task to monitor all celery queue lengths."""
+
+    r_celery = task.app.broker_connection().channel().client  # type: ignore
+    n_celery = celery_get_queue_length("celery", r_celery)
+    n_indexing = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_INDEXING, r_celery)
+    n_sync = celery_get_queue_length(OnyxCeleryQueues.VESPA_METADATA_SYNC, r_celery)
+    n_deletion = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_DELETION, r_celery)
+    n_pruning = celery_get_queue_length(OnyxCeleryQueues.CONNECTOR_PRUNING, r_celery)
+    n_permissions_sync = celery_get_queue_length(
+        OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC, r_celery
+    )
+    n_external_group_sync = celery_get_queue_length(
+        OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC, r_celery
+    )
+    n_permissions_upsert = celery_get_queue_length(
+        OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT, r_celery
+    )
+
+    n_indexing_prefetched = celery_get_unacked_task_ids(
+        OnyxCeleryQueues.CONNECTOR_INDEXING, r_celery
+    )
+
+    task_logger.info(
+        f"Queue lengths: celery={n_celery} "
+        f"indexing={n_indexing} "
+        f"indexing_prefetched={len(n_indexing_prefetched)} "
+        f"sync={n_sync} "
+        f"deletion={n_deletion} "
+        f"pruning={n_pruning} "
+        f"permissions_sync={n_permissions_sync} "
+        f"external_group_sync={n_external_group_sync} "
+        f"permissions_upsert={n_permissions_upsert} "
+    )
--- a/backend/onyx/background/celery/tasks/pruning/tasks.py
+++ b/backend/onyx/background/celery/tasks/pruning/tasks.py
@@ -122,34 +122,39 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> bool | None:
        return None

    try:
-        cc_pair_ids: list[int] = []
-        with get_session_with_tenant(tenant_id) as db_session:
-            cc_pairs = get_connector_credential_pairs(db_session)
-            for cc_pair_entry in cc_pairs:
-                cc_pair_ids.append(cc_pair_entry.id)
+        # the entire task needs to run frequently in order to finalize pruning

-        for cc_pair_id in cc_pair_ids:
-            lock_beat.reacquire()
+        # but pruning only kicks off once per hour
+        if not r.exists(OnyxRedisSignals.BLOCK_PRUNING):
+            cc_pair_ids: list[int] = []
            with get_session_with_tenant(tenant_id) as db_session:
-                cc_pair = get_connector_credential_pair_from_id(
-                    db_session=db_session,
-                    cc_pair_id=cc_pair_id,
-                )
-                if not cc_pair:
-                    continue
+                cc_pairs = get_connector_credential_pairs(db_session)
+                for cc_pair_entry in cc_pairs:
+                    cc_pair_ids.append(cc_pair_entry.id)

-                if not _is_pruning_due(cc_pair):
-                    continue
+            for cc_pair_id in cc_pair_ids:
+                lock_beat.reacquire()
+                with get_session_with_tenant(tenant_id) as db_session:
+                    cc_pair = get_connector_credential_pair_from_id(
+                        db_session=db_session,
+                        cc_pair_id=cc_pair_id,
+                    )
+                    if not cc_pair:
+                        continue

-                payload_id = try_creating_prune_generator_task(
-                    self.app, cc_pair, db_session, r, tenant_id
-                )
-                if not payload_id:
-                    continue
+                    if not _is_pruning_due(cc_pair):
+                        continue

-                task_logger.info(
-                    f"Pruning queued: cc_pair={cc_pair.id} id={payload_id}"
-                )
+                    payload_id = try_creating_prune_generator_task(
+                        self.app, cc_pair, db_session, r, tenant_id
+                    )
+                    if not payload_id:
+                        continue
+
+                    task_logger.info(
+                        f"Pruning queued: cc_pair={cc_pair.id} id={payload_id}"
+                    )
+            r.set(OnyxRedisSignals.BLOCK_PRUNING, 1, ex=3600)

        # we want to run this less frequently than the overall task
        lock_beat.reacquire()
@@ -163,6 +168,22 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> bool | None:
                task_logger.exception("Exception while validating pruning fences")

            r.set(OnyxRedisSignals.BLOCK_VALIDATE_PRUNING_FENCES, 1, ex=300)
+
+        # use a lookup table to find active fences. We still have to verify the fence
+        # exists since it is an optimization and not the source of truth.
+        lock_beat.reacquire()
+        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
+        for key in keys:
+            key_bytes = cast(bytes, key)
+
+            if not r.exists(key_bytes):
+                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
+                continue
+
+            key_str = key_bytes.decode("utf-8")
+            if key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):
+                with get_session_with_tenant(tenant_id) as db_session:
+                    monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session)
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
@@ -481,7 +502,7 @@ def connector_pruning_generator_task(
    )


-"""Monitoring pruning utils, called in monitor_vespa_sync"""
+"""Monitoring pruning utils"""


 def monitor_ccpair_pruning_taskset(
--- a/backend/onyx/background/celery/tasks/shared/tasks.py
+++ b/backend/onyx/background/celery/tasks/shared/tasks.py
@@ -8,6 +8,7 @@ from celery.exceptions import SoftTimeLimitExceeded
 from redis.lock import Lock as RedisLock
 from tenacity import RetryError

+from ee.onyx.server.tenants.product_gating import get_gated_tenants
 from onyx.access.access import get_access_for_document
 from onyx.background.celery.apps.app_base import task_logger
 from onyx.background.celery.tasks.beat_schedule import BEAT_EXPIRES_DEFAULT
@@ -252,7 +253,11 @@ def cloud_beat_task_generator(

    try:
        tenant_ids = get_all_tenant_ids()
+        gated_tenants = get_gated_tenants()
        for tenant_id in tenant_ids:
+            if tenant_id in gated_tenants:
+                continue
+
            current_time = time.monotonic()
            if current_time - last_lock_time >= (CELERY_GENERIC_BEAT_LOCK_TIMEOUT / 4):
                lock_beat.reacquire()
@@ -270,6 +275,7 @@ def cloud_beat_task_generator(
                queue=queue,
                priority=priority,
                expires=expires,
+                ignore_result=True,
            )
    except SoftTimeLimitExceeded:
        task_logger.info(
--- a/backend/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/onyx/background/celery/tasks/vespa/tasks.py
@@ -1,9 +1,5 @@
-import random
 import time
-import traceback
 from collections.abc import Callable
-from datetime import datetime
-from datetime import timezone
 from http import HTTPStatus
 from typing import Any
 from typing import cast
@@ -13,8 +9,6 @@ from celery import Celery
 from celery import shared_task
 from celery import Task
 from celery.exceptions import SoftTimeLimitExceeded
-from celery.result import AsyncResult
-from celery.states import READY_STATES
 from redis import Redis
 from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session
@@ -22,47 +16,27 @@ from tenacity import RetryError

 from onyx.access.access import get_access_for_document
 from onyx.background.celery.apps.app_base import task_logger
-from onyx.background.celery.celery_redis import celery_get_queue_length
-from onyx.background.celery.celery_redis import celery_get_unacked_task_ids
-from onyx.background.celery.tasks.doc_permission_syncing.tasks import (
-    monitor_ccpair_permissions_taskset,
-)
-from onyx.background.celery.tasks.pruning.tasks import monitor_ccpair_pruning_taskset
 from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex
 from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT
 from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT
 from onyx.configs.app_configs import JOB_TIMEOUT
 from onyx.configs.app_configs import VESPA_SYNC_MAX_TASKS
 from onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
-from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.configs.constants import OnyxRedisLocks
-from onyx.configs.constants import OnyxRedisSignals
-from onyx.db.connector import fetch_connector_by_id
-from onyx.db.connector_credential_pair import add_deletion_failure_message
-from onyx.db.connector_credential_pair import (
-    delete_connector_credential_pair__no_commit,
-)
-from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.connector_credential_pair import get_connector_credential_pairs
 from onyx.db.document import count_documents_by_needs_sync
 from onyx.db.document import get_document
-from onyx.db.document import get_document_ids_for_connector_credential_pair
 from onyx.db.document import mark_document_as_synced
 from onyx.db.document_set import delete_document_set
-from onyx.db.document_set import delete_document_set_cc_pair_relationship__no_commit
 from onyx.db.document_set import fetch_document_sets
 from onyx.db.document_set import fetch_document_sets_for_document
 from onyx.db.document_set import get_document_set_by_id
 from onyx.db.document_set import mark_document_set_as_synced
 from onyx.db.engine import get_session_with_tenant
-from onyx.db.enums import IndexingStatus
 from onyx.db.enums import SyncStatus
 from onyx.db.enums import SyncType
-from onyx.db.index_attempt import delete_index_attempts
-from onyx.db.index_attempt import get_index_attempt
-from onyx.db.index_attempt import mark_attempt_failed
 from onyx.db.models import DocumentSet
 from onyx.db.models import UserGroup
 from onyx.db.search_settings import get_active_search_settings
@@ -72,20 +46,14 @@ from onyx.db.sync_record import update_sync_record_status
 from onyx.document_index.factory import get_default_document_index
 from onyx.document_index.interfaces import VespaDocumentFields
 from onyx.httpx.httpx_pool import HttpxPool
-from onyx.redis.redis_connector import RedisConnector
 from onyx.redis.redis_connector_credential_pair import RedisConnectorCredentialPair
 from onyx.redis.redis_connector_credential_pair import (
    RedisGlobalConnectorCredentialPair,
 )
-from onyx.redis.redis_connector_delete import RedisConnectorDelete
-from onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
-from onyx.redis.redis_connector_index import RedisConnectorIndex
-from onyx.redis.redis_connector_prune import RedisConnectorPrune
 from onyx.redis.redis_document_set import RedisDocumentSet
 from onyx.redis.redis_pool import get_redis_client
 from onyx.redis.redis_pool import get_redis_replica_client
 from onyx.redis.redis_pool import redis_lock_dump
-from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
 from onyx.redis.redis_usergroup import RedisUserGroup
 from onyx.utils.logger import setup_logger
 from onyx.utils.variable_functionality import fetch_versioned_implementation
@@ -94,7 +62,6 @@ from onyx.utils.variable_functionality import (
 )
 from onyx.utils.variable_functionality import global_version
 from onyx.utils.variable_functionality import noop_fallback
-from shared_configs.configs import MULTI_TENANT

 logger = setup_logger()

@@ -111,9 +78,14 @@ logger = setup_logger()
 def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | None:
    """Runs periodically to check if any document needs syncing.
    Generates sets of tasks for Celery if syncing is needed."""
+
+    # Useful for debugging timing issues with reacquisitions. TODO: remove once more generalized logging is in place
+    task_logger.info("check_for_vespa_sync_task started")
+
    time_start = time.monotonic()

    r = get_redis_client(tenant_id=tenant_id)
+    r_replica = get_redis_replica_client(tenant_id=tenant_id)

    lock_beat: RedisLock = r.lock(
        OnyxRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK,
@@ -125,6 +97,7 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No
        return None

    try:
+        # 1/3: KICKOFF
        with get_session_with_tenant(tenant_id) as db_session:
            try_generate_stale_document_sync_tasks(
                self.app, VESPA_SYNC_MAX_TASKS, db_session, r, lock_beat, tenant_id
@@ -151,9 +124,8 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No
        # endregion

        # check if any user groups are not synced
+        lock_beat.reacquire()
        if global_version.is_ee_version():
-            lock_beat.reacquire()
-
            try:
                fetch_user_groups = fetch_versioned_implementation(
                    "onyx.db.user_group", "fetch_user_groups"
@@ -179,6 +151,35 @@ def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> bool | No
                            self.app, usergroup_id, db_session, r, lock_beat, tenant_id
                        )

+        # 2/3: VALIDATE: TODO
+
+        # 3/3: FINALIZE
+        lock_beat.reacquire()
+        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
+        for key in keys:
+            key_bytes = cast(bytes, key)
+
+            if not r.exists(key_bytes):
+                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
+                continue
+
+            key_str = key_bytes.decode("utf-8")
+            if key_str == RedisGlobalConnectorCredentialPair.FENCE_KEY:
+                monitor_connector_taskset(r)
+            elif key_str.startswith(RedisDocumentSet.FENCE_PREFIX):
+                with get_session_with_tenant(tenant_id) as db_session:
+                    monitor_document_set_taskset(tenant_id, key_bytes, r, db_session)
+            elif key_str.startswith(RedisUserGroup.FENCE_PREFIX):
+                monitor_usergroup_taskset = (
+                    fetch_versioned_implementation_with_fallback(
+                        "onyx.background.celery.tasks.vespa.tasks",
+                        "monitor_usergroup_taskset",
+                        noop_fallback,
+                    )
+                )
+                with get_session_with_tenant(tenant_id) as db_session:
+                    monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session)
+
    except SoftTimeLimitExceeded:
        task_logger.info(
            "Soft time limit exceeded, task is being terminated gracefully."
@@ -495,484 +496,23 @@ def monitor_document_set_taskset(
            task_logger.info(
                f"Successfully synced document set: document_set={document_set_id}"
            )
-        update_sync_record_status(
-            db_session=db_session,
-            entity_id=document_set_id,
-            sync_type=SyncType.DOCUMENT_SET,
-            sync_status=SyncStatus.SUCCESS,
-            num_docs_synced=initial_count,
-        )
-
-    rds.reset()
-
-
-def monitor_connector_deletion_taskset(
-    tenant_id: str | None, key_bytes: bytes, r: Redis
-) -> None:
-    fence_key = key_bytes.decode("utf-8")
-    cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key)
-    if cc_pair_id_str is None:
-        task_logger.warning(f"could not parse cc_pair_id from {fence_key}")
-        return
-
-    cc_pair_id = int(cc_pair_id_str)
-
-    redis_connector = RedisConnector(tenant_id, cc_pair_id)
-
-    fence_data = redis_connector.delete.payload
-    if not fence_data:
-        task_logger.warning(
-            f"Connector deletion - fence payload invalid: cc_pair={cc_pair_id}"
-        )
-        return
-
-    if fence_data.num_tasks is None:
-        # the fence is setting up but isn't ready yet
-        return
-
-    remaining = redis_connector.delete.get_remaining()
-    task_logger.info(
-        f"Connector deletion progress: cc_pair={cc_pair_id} remaining={remaining} initial={fence_data.num_tasks}"
-    )
-    if remaining > 0:
-        with get_session_with_tenant(tenant_id) as db_session:
-            update_sync_record_status(
-                db_session=db_session,
-                entity_id=cc_pair_id,
-                sync_type=SyncType.CONNECTOR_DELETION,
-                sync_status=SyncStatus.IN_PROGRESS,
-                num_docs_synced=remaining,
-            )
-        return
-
-    with get_session_with_tenant(tenant_id) as db_session:
-        cc_pair = get_connector_credential_pair_from_id(
-            db_session=db_session,
-            cc_pair_id=cc_pair_id,
-        )
-        if not cc_pair:
-            task_logger.warning(
-                f"Connector deletion - cc_pair not found: cc_pair={cc_pair_id}"
-            )
-            return

        try:
-            doc_ids = get_document_ids_for_connector_credential_pair(
-                db_session, cc_pair.connector_id, cc_pair.credential_id
-            )
-            if len(doc_ids) > 0:
-                # NOTE(rkuo): if this happens, documents somehow got added while
-                # deletion was in progress. Likely a bug gating off pruning and indexing
-                # work before deletion starts.
-                task_logger.warning(
-                    "Connector deletion - documents still found after taskset completion. "
-                    "Clearing the current deletion attempt and allowing deletion to restart: "
-                    f"cc_pair={cc_pair_id} "
-                    f"docs_deleted={fence_data.num_tasks} "
-                    f"docs_remaining={len(doc_ids)}"
-                )
-
-                # We don't want to waive off why we get into this state, but resetting
-                # our attempt and letting the deletion restart is a good way to recover
-                redis_connector.delete.reset()
-                raise RuntimeError(
-                    "Connector deletion - documents still found after taskset completion"
-                )
-
-            # clean up the rest of the related Postgres entities
-            # index attempts
-            delete_index_attempts(
-                db_session=db_session,
-                cc_pair_id=cc_pair_id,
-            )
-
-            # document sets
-            delete_document_set_cc_pair_relationship__no_commit(
-                db_session=db_session,
-                connector_id=cc_pair.connector_id,
-                credential_id=cc_pair.credential_id,
-            )
-
-            # user groups
-            cleanup_user_groups = fetch_versioned_implementation_with_fallback(
-                "onyx.db.user_group",
-                "delete_user_group_cc_pair_relationship__no_commit",
-                noop_fallback,
-            )
-            cleanup_user_groups(
-                cc_pair_id=cc_pair_id,
-                db_session=db_session,
-            )
-
-            # finally, delete the cc-pair
-            delete_connector_credential_pair__no_commit(
-                db_session=db_session,
-                connector_id=cc_pair.connector_id,
-                credential_id=cc_pair.credential_id,
-            )
-            # if there are no credentials left, delete the connector
-            connector = fetch_connector_by_id(
-                db_session=db_session,
-                connector_id=cc_pair.connector_id,
-            )
-            if not connector or not len(connector.credentials):
-                task_logger.info(
-                    "Connector deletion - Found no credentials left for connector, deleting connector"
-                )
-                db_session.delete(connector)
-            db_session.commit()
-
            update_sync_record_status(
                db_session=db_session,
-                entity_id=cc_pair_id,
-                sync_type=SyncType.CONNECTOR_DELETION,
+                entity_id=document_set_id,
+                sync_type=SyncType.DOCUMENT_SET,
                sync_status=SyncStatus.SUCCESS,
-                num_docs_synced=fence_data.num_tasks,
+                num_docs_synced=initial_count,
            )
-
-        except Exception as e:
-            db_session.rollback()
-            stack_trace = traceback.format_exc()
-            error_message = f"Error: {str(e)}\n\nStack Trace:\n{stack_trace}"
-            add_deletion_failure_message(db_session, cc_pair_id, error_message)
-
-            update_sync_record_status(
-                db_session=db_session,
-                entity_id=cc_pair_id,
-                sync_type=SyncType.CONNECTOR_DELETION,
-                sync_status=SyncStatus.FAILED,
-                num_docs_synced=fence_data.num_tasks,
-            )
-
+        except Exception:
            task_logger.exception(
-                f"Connector deletion exceptioned: "
-                f"cc_pair={cc_pair_id} connector={cc_pair.connector_id} credential={cc_pair.credential_id}"
-            )
-            raise e
-
-    task_logger.info(
-        f"Connector deletion succeeded: "
-        f"cc_pair={cc_pair_id} "
-        f"connector={cc_pair.connector_id} "
-        f"credential={cc_pair.credential_id} "
-        f"docs_deleted={fence_data.num_tasks}"
-    )
-
-    redis_connector.delete.reset()
-
-
-def monitor_ccpair_indexing_taskset(
-    tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session
-) -> None:
-    # if the fence doesn't exist, there's nothing to do
-    fence_key = key_bytes.decode("utf-8")
-    composite_id = RedisConnector.get_id_from_fence_key(fence_key)
-    if composite_id is None:
-        task_logger.warning(
-            f"Connector indexing: could not parse composite_id from {fence_key}"
-        )
-        return
-
-    # parse out metadata and initialize the helper class with it
-    parts = composite_id.split("/")
-    if len(parts) != 2:
-        return
-
-    cc_pair_id = int(parts[0])
-    search_settings_id = int(parts[1])
-
-    redis_connector = RedisConnector(tenant_id, cc_pair_id)
-    redis_connector_index = redis_connector.new_index(search_settings_id)
-    if not redis_connector_index.fenced:
-        return
-
-    payload = redis_connector_index.payload
-    if not payload:
-        return
-
-    elapsed_started_str = None
-    if payload.started:
-        elapsed_started = datetime.now(timezone.utc) - payload.started
-        elapsed_started_str = f"{elapsed_started.total_seconds():.2f}"
-
-    elapsed_submitted = datetime.now(timezone.utc) - payload.submitted
-
-    progress = redis_connector_index.get_progress()
-    if progress is not None:
-        task_logger.info(
-            f"Connector indexing progress: "
-            f"attempt={payload.index_attempt_id} "
-            f"cc_pair={cc_pair_id} "
-            f"search_settings={search_settings_id} "
-            f"progress={progress} "
-            f"elapsed_submitted={elapsed_submitted.total_seconds():.2f} "
-            f"elapsed_started={elapsed_started_str}"
-        )
-
-    if payload.index_attempt_id is None or payload.celery_task_id is None:
-        # the task is still setting up
-        return
-
-    # never use any blocking methods on the result from inside a task!
-    result: AsyncResult = AsyncResult(payload.celery_task_id)
-
-    # inner/outer/inner double check pattern to avoid race conditions when checking for
-    # bad state
-
-    # Verify: if the generator isn't complete, the task must not be in READY state
-    # inner = get_completion / generator_complete not signaled
-    # outer = result.state in READY state
-    status_int = redis_connector_index.get_completion()
-    if status_int is None:  # inner signal not set ... possible error
-        task_state = result.state
-        if (
-            task_state in READY_STATES
-        ):  # outer signal in terminal state ... possible error
-            # Now double check!
-            if redis_connector_index.get_completion() is None:
-                # inner signal still not set (and cannot change when outer result_state is READY)
-                # Task is finished but generator complete isn't set.
-                # We have a problem! Worker may have crashed.
-                task_result = str(result.result)
-                task_traceback = str(result.traceback)
-
-                msg = (
-                    f"Connector indexing aborted or exceptioned: "
-                    f"attempt={payload.index_attempt_id} "
-                    f"celery_task={payload.celery_task_id} "
-                    f"cc_pair={cc_pair_id} "
-                    f"search_settings={search_settings_id} "
-                    f"elapsed_submitted={elapsed_submitted.total_seconds():.2f} "
-                    f"result.state={task_state} "
-                    f"result.result={task_result} "
-                    f"result.traceback={task_traceback}"
-                )
-                task_logger.warning(msg)
-
-                try:
-                    index_attempt = get_index_attempt(
-                        db_session, payload.index_attempt_id
-                    )
-                    if index_attempt:
-                        if (
-                            index_attempt.status != IndexingStatus.CANCELED
-                            and index_attempt.status != IndexingStatus.FAILED
-                        ):
-                            mark_attempt_failed(
-                                index_attempt_id=payload.index_attempt_id,
-                                db_session=db_session,
-                                failure_reason=msg,
-                            )
-                except Exception:
-                    task_logger.exception(
-                        "Connector indexing - Transient exception marking index attempt as failed: "
-                        f"attempt={payload.index_attempt_id} "
-                        f"tenant={tenant_id} "
-                        f"cc_pair={cc_pair_id} "
-                        f"search_settings={search_settings_id}"
-                    )
-
-                redis_connector_index.reset()
-        return
-
-    if redis_connector_index.watchdog_signaled():
-        # if the generator is complete, don't clean up until the watchdog has exited
-        task_logger.info(
-            f"Connector indexing - Delaying finalization until watchdog has exited: "
-            f"attempt={payload.index_attempt_id} "
-            f"cc_pair={cc_pair_id} "
-            f"search_settings={search_settings_id} "
-            f"progress={progress} "
-            f"elapsed_submitted={elapsed_submitted.total_seconds():.2f} "
-            f"elapsed_started={elapsed_started_str}"
-        )
-
-        return
-
-    status_enum = HTTPStatus(status_int)
-
-    task_logger.info(
-        f"Connector indexing finished: "
-        f"attempt={payload.index_attempt_id} "
-        f"cc_pair={cc_pair_id} "
-        f"search_settings={search_settings_id} "
-        f"progress={progress} "
-        f"status={status_enum.name} "
-        f"elapsed_submitted={elapsed_submitted.total_seconds():.2f} "
-        f"elapsed_started={elapsed_started_str}"
-    )
-
-    redis_connector_index.reset()
-
-
-@shared_task(
-    name=OnyxCeleryTask.MONITOR_VESPA_SYNC,
-    ignore_result=True,
-    soft_time_limit=300,
-    bind=True,
-)
-def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool | None:
-    """This is a celery beat task that monitors and finalizes various long running tasks.
-
-    The name monitor_vespa_sync is a bit of a misnomer since it checks many different tasks
-    now. Should change that at some point.
-
-    It scans for fence values and then gets the counts of any associated tasksets.
-    For many tasks, the count is 0, that means all tasks finished and we should clean up.
-
-    This task lock timeout is CELERY_METADATA_SYNC_BEAT_LOCK_TIMEOUT seconds, so don't
-    do anything too expensive in this function!
-
-    Returns True if the task actually did work, False if it exited early to prevent overlap
-    """
-    task_logger.info(f"monitor_vespa_sync starting: tenant={tenant_id}")
-
-    time_start = time.monotonic()
-
-    r = get_redis_client(tenant_id=tenant_id)
-
-    # Replica usage notes
-    #
-    # False negatives are OK. (aka fail to to see a key that exists on the master).
-    # We simply skip the monitoring work and it will be caught on the next pass.
-    #
-    # False positives are not OK, and are possible if we clear a fence on the master and
-    # then read from the replica. In this case, monitoring work could be done on a fence
-    # that no longer exists. To avoid this, we scan from the replica, but double check
-    # the result on the master.
-    r_replica = get_redis_replica_client(tenant_id=tenant_id)
-
-    lock_beat: RedisLock = r.lock(
-        OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK,
-        timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT,
-    )
-
-    # prevent overlapping tasks
-    if not lock_beat.acquire(blocking=False):
-        return None
-
-    try:
-        # print current queue lengths
-        time.monotonic()
-        # we don't need every tenant polling redis for this info.
-        if not MULTI_TENANT or random.randint(1, 10) == 10:
-            r_celery = self.app.broker_connection().channel().client  # type: ignore
-            n_celery = celery_get_queue_length("celery", r_celery)
-            n_indexing = celery_get_queue_length(
-                OnyxCeleryQueues.CONNECTOR_INDEXING, r_celery
-            )
-            n_sync = celery_get_queue_length(
-                OnyxCeleryQueues.VESPA_METADATA_SYNC, r_celery
-            )
-            n_deletion = celery_get_queue_length(
-                OnyxCeleryQueues.CONNECTOR_DELETION, r_celery
-            )
-            n_pruning = celery_get_queue_length(
-                OnyxCeleryQueues.CONNECTOR_PRUNING, r_celery
-            )
-            n_permissions_sync = celery_get_queue_length(
-                OnyxCeleryQueues.CONNECTOR_DOC_PERMISSIONS_SYNC, r_celery
-            )
-            n_external_group_sync = celery_get_queue_length(
-                OnyxCeleryQueues.CONNECTOR_EXTERNAL_GROUP_SYNC, r_celery
-            )
-            n_permissions_upsert = celery_get_queue_length(
-                OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT, r_celery
+                "update_sync_record_status exceptioned. "
+                f"document_set_id={document_set_id} "
+                "Resetting document set regardless."
            )

-            prefetched = celery_get_unacked_task_ids(
-                OnyxCeleryQueues.CONNECTOR_INDEXING, r_celery
-            )
-
-            task_logger.info(
-                f"Queue lengths: celery={n_celery} "
-                f"indexing={n_indexing} "
-                f"indexing_prefetched={len(prefetched)} "
-                f"sync={n_sync} "
-                f"deletion={n_deletion} "
-                f"pruning={n_pruning} "
-                f"permissions_sync={n_permissions_sync} "
-                f"external_group_sync={n_external_group_sync} "
-                f"permissions_upsert={n_permissions_upsert} "
-            )
-
-        # we want to run this less frequently than the overall task
-        if not r.exists(OnyxRedisSignals.BLOCK_BUILD_FENCE_LOOKUP_TABLE):
-            # build a lookup table of existing fences
-            # this is just a migration concern and should be unnecessary once
-            # lookup tables are rolled out
-            for key_bytes in r_replica.scan_iter(count=SCAN_ITER_COUNT_DEFAULT):
-                if is_fence(key_bytes) and not r.sismember(
-                    OnyxRedisConstants.ACTIVE_FENCES, key_bytes
-                ):
-                    logger.warning(f"Adding {key_bytes} to the lookup table.")
-                    r.sadd(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
-
-            r.set(OnyxRedisSignals.BLOCK_BUILD_FENCE_LOOKUP_TABLE, 1, ex=300)
-
-        # use a lookup table to find active fences. We still have to verify the fence
-        # exists since it is an optimization and not the source of truth.
-        keys = cast(set[Any], r_replica.smembers(OnyxRedisConstants.ACTIVE_FENCES))
-        for key in keys:
-            key_bytes = cast(bytes, key)
-
-            if not r.exists(key_bytes):
-                r.srem(OnyxRedisConstants.ACTIVE_FENCES, key_bytes)
-                continue
-
-            key_str = key_bytes.decode("utf-8")
-            if key_str == RedisGlobalConnectorCredentialPair.FENCE_KEY:
-                monitor_connector_taskset(r)
-            elif key_str.startswith(RedisDocumentSet.FENCE_PREFIX):
-                with get_session_with_tenant(tenant_id) as db_session:
-                    monitor_document_set_taskset(tenant_id, key_bytes, r, db_session)
-            elif key_str.startswith(RedisUserGroup.FENCE_PREFIX):
-                monitor_usergroup_taskset = (
-                    fetch_versioned_implementation_with_fallback(
-                        "onyx.background.celery.tasks.vespa.tasks",
-                        "monitor_usergroup_taskset",
-                        noop_fallback,
-                    )
-                )
-                with get_session_with_tenant(tenant_id) as db_session:
-                    monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session)
-            elif key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):
-                monitor_connector_deletion_taskset(tenant_id, key_bytes, r)
-            elif key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):
-                with get_session_with_tenant(tenant_id) as db_session:
-                    monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session)
-            elif key_str.startswith(RedisConnectorIndex.FENCE_PREFIX):
-                with get_session_with_tenant(tenant_id) as db_session:
-                    monitor_ccpair_indexing_taskset(tenant_id, key_bytes, r, db_session)
-            elif key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):
-                with get_session_with_tenant(tenant_id) as db_session:
-                    monitor_ccpair_permissions_taskset(
-                        tenant_id, key_bytes, r, db_session
-                    )
-            else:
-                pass
-    except SoftTimeLimitExceeded:
-        task_logger.info(
-            "Soft time limit exceeded, task is being terminated gracefully."
-        )
-        return False
-    except Exception:
-        task_logger.exception("monitor_vespa_sync exceptioned.")
-        return False
-    finally:
-        if lock_beat.owned():
-            lock_beat.release()
-        else:
-            task_logger.error(
-                "monitor_vespa_sync - Lock not owned on completion: "
-                f"tenant={tenant_id}"
-                # f"timings={timings}"
-            )
-            redis_lock_dump(lock_beat, r)
-
-    time_elapsed = time.monotonic() - time_start
-    task_logger.info(f"monitor_vespa_sync finished: elapsed={time_elapsed:.2f}")
-    return True
+    rds.reset()


@shared_task(
@@ -1072,23 +612,3 @@ def vespa_metadata_sync_task(
        self.retry(exc=e, countdown=countdown)

    return True
-
-
-def is_fence(key_bytes: bytes) -> bool:
-    key_str = key_bytes.decode("utf-8")
-    if key_str == RedisGlobalConnectorCredentialPair.FENCE_KEY:
-        return True
-    if key_str.startswith(RedisDocumentSet.FENCE_PREFIX):
-        return True
-    if key_str.startswith(RedisUserGroup.FENCE_PREFIX):
-        return True
-    if key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):
-        return True
-    if key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):
-        return True
-    if key_str.startswith(RedisConnectorIndex.FENCE_PREFIX):
-        return True
-    if key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):
-        return True
-
-    return False
--- a/backend/onyx/background/error_logging.py
+++ b/backend/onyx/background/error_logging.py
@@ -0,0 +1,13 @@
+from onyx.db.background_error import create_background_error
+from onyx.db.engine import get_session_with_tenant
+
+
+def emit_background_error(
+    message: str,
+    cc_pair_id: int | None = None,
+) -> None:
+    """Currently just saves a row in the background_errors table.
+
+    In the future, could create notifications based on the severity."""
+    with get_session_with_tenant() as db_session:
+        create_background_error(db_session, message, cc_pair_id)
--- a/backend/onyx/configs/agent_configs.py
+++ b/backend/onyx/configs/agent_configs.py
@@ -3,6 +3,24 @@ import os
 INITIAL_SEARCH_DECOMPOSITION_ENABLED = True
 ALLOW_REFINEMENT = True

+AGENT_DEFAULT_RETRIEVAL_HITS = 15
+AGENT_DEFAULT_RERANKING_HITS = 10
+AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8
+AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3
+AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5
+
+AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER = 25
+AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER = 35
+
+
+AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 5
+AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
+AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
+AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000
+
+INITIAL_SEARCH_DECOMPOSITION_ENABLED = True
+ALLOW_REFINEMENT = True
+
 AGENT_DEFAULT_RETRIEVAL_HITS = 15
 AGENT_DEFAULT_RERANKING_HITS = 10
 AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8
@@ -13,9 +31,21 @@ AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
 AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
 AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000

-#####
-# Agent Configs
-#####
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = 30  # in seconds
+
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = 10  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = 25  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = 4  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = 1  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = 3  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = 12  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = 8  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = 25  # in seconds
+
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = 6  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = 25  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = 8  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = 8  # in seconds


 AGENT_RETRIEVAL_STATS = (
@@ -77,4 +107,151 @@ AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
    or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH
 )  # 2000

+AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER = int(
+    os.environ.get("AGENT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER")
+    or AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_INITIAL_ANSWER
+)  # 25
+
+AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER = int(
+    os.environ.get("AGENT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER")
+    or AGENT_DEFAULT_MAX_STREAMED_DOCS_FOR_REFINED_ANSWER
+)  # 35
+
+
+AGENT_RETRIEVAL_STATS = (
+    not os.environ.get("AGENT_RETRIEVAL_STATS") == "False"
+) or True  # default True
+
+
+AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
+    os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
+)  # 15
+
+AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
+    os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
+)  # 15
+
+# Reranking agent configs
+# Reranking stats - no influence on flow outside of stats collection
+AGENT_RERANKING_STATS = (
+    not os.environ.get("AGENT_RERANKING_STATS") == "True"
+) or False  # default False
+
+AGENT_MAX_QUERY_RETRIEVAL_RESULTS = int(
+    os.environ.get("AGENT_MAX_QUERY_RETRIEVAL_RESULTS") or AGENT_DEFAULT_RETRIEVAL_HITS
+)  # 15
+
+AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS = int(
+    os.environ.get("AGENT_RERANKING_MAX_QUERY_RETRIEVAL_RESULTS")
+    or AGENT_DEFAULT_RERANKING_HITS
+)  # 10
+
+AGENT_NUM_DOCS_FOR_DECOMPOSITION = int(
+    os.environ.get("AGENT_NUM_DOCS_FOR_DECOMPOSITION")
+    or AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION
+)  # 3
+
+AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = int(
+    os.environ.get("AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION")
+    or AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION
+)  # 5
+
+AGENT_EXPLORATORY_SEARCH_RESULTS = int(
+    os.environ.get("AGENT_EXPLORATORY_SEARCH_RESULTS")
+    or AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS
+)  # 5
+
+AGENT_MIN_ORIG_QUESTION_DOCS = int(
+    os.environ.get("AGENT_MIN_ORIG_QUESTION_DOCS")
+    or AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS
+)  # 3
+
+AGENT_MAX_ANSWER_CONTEXT_DOCS = int(
+    os.environ.get("AGENT_MAX_ANSWER_CONTEXT_DOCS")
+    or AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS
+)  # 8
+
+
+AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
+    os.environ.get("AGENT_MAX_STATIC_HISTORY_WORD_LENGTH")
+    or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH
+)  # 2000
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION
+)  # 25
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
+)  # 3
+
+AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION
+)  # 30
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION
+)  # 8
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
+)  # 12
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION
+)  # 25
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
+)  # 25
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
+)  # 8
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION
+)  # 6
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION
+)  # 1
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION
+)  # 4
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
+)  # 8
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_VALIDATION
+)  # 8
+
 GRAPH_VERSION_NAME: str = "a"
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -107,9 +107,9 @@ CELERY_TASK_WAIT_FOR_FENCE_TIMEOUT = 5 * 60  # 5 min

 # needs to be long enough to cover the maximum time it takes to download an object
 # if we can get callbacks as object bytes download, we could lower this a lot.
-CELERY_PRUNING_LOCK_TIMEOUT = 300  # 5 min
+CELERY_PRUNING_LOCK_TIMEOUT = 3600  # 1 hour (in seconds)

-CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT = 300  # 5 min
+CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT = 3600  # 1 hour (in seconds)

 CELERY_EXTERNAL_GROUP_SYNC_LOCK_TIMEOUT = 300  # 5 min

@@ -125,6 +125,7 @@ class DocumentSource(str, Enum):
    GMAIL = "gmail"
    REQUESTTRACKER = "requesttracker"
    GITHUB = "github"
+    GITBOOK = "gitbook"
    GITLAB = "gitlab"
    GURU = "guru"
    BOOKSTACK = "bookstack"
@@ -298,7 +299,6 @@ class OnyxRedisLocks:
    CHECK_CONNECTOR_EXTERNAL_GROUP_SYNC_BEAT_LOCK = (
        "da_lock:check_connector_external_group_sync_beat"
    )
-    MONITOR_VESPA_SYNC_BEAT_LOCK = "da_lock:monitor_vespa_sync_beat"
    MONITOR_BACKGROUND_PROCESSES_LOCK = "da_lock:monitor_background_processes"

    CONNECTOR_DOC_PERMISSIONS_SYNC_LOCK_PREFIX = (
@@ -324,6 +324,7 @@ class OnyxRedisSignals:
    BLOCK_VALIDATE_PERMISSION_SYNC_FENCES = (
        "signal:block_validate_permission_sync_fences"
    )
+    BLOCK_PRUNING = "signal:block_pruning"
    BLOCK_VALIDATE_PRUNING_FENCES = "signal:block_validate_pruning_fences"
    BLOCK_BUILD_FENCE_LOOKUP_TABLE = "signal:block_build_fence_lookup_table"

@@ -354,7 +355,10 @@ class OnyxCeleryTask:
    DEFAULT = "celery"

    CLOUD_BEAT_TASK_GENERATOR = f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_generate_beat_tasks"
-    CLOUD_CHECK_ALEMBIC = f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check_alembic"
+    CLOUD_MONITOR_ALEMBIC = f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor_alembic"
+    CLOUD_MONITOR_CELERY_QUEUES = (
+        f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor_celery_queues"
+    )

    CHECK_FOR_CONNECTOR_DELETION = "check_for_connector_deletion_task"
    CHECK_FOR_VESPA_SYNC_TASK = "check_for_vespa_sync_task"
@@ -364,8 +368,8 @@ class OnyxCeleryTask:
    CHECK_FOR_EXTERNAL_GROUP_SYNC = "check_for_external_group_sync"
    CHECK_FOR_LLM_MODEL_UPDATE = "check_for_llm_model_update"

-    MONITOR_VESPA_SYNC = "monitor_vespa_sync"
    MONITOR_BACKGROUND_PROCESSES = "monitor_background_processes"
+    MONITOR_CELERY_QUEUES = "monitor_celery_queues"

    KOMBU_MESSAGE_CLEANUP_TASK = "kombu_message_cleanup_task"
    CONNECTOR_PERMISSION_SYNC_GENERATOR_TASK = (
--- a/backend/onyx/connectors/bookstack/connector.py
+++ b/backend/onyx/connectors/bookstack/connector.py
@@ -61,6 +61,7 @@ class BookstackConnector(LoadConnector, PollConnector):
            )

        batch = bookstack_client.get(endpoint, params=params).get("data", [])
+
        doc_batch = [transformer(bookstack_client, item) for item in batch]

        return doc_batch, len(batch)
@@ -197,20 +198,31 @@ class BookstackConnector(LoadConnector, PollConnector):
        for endpoint, transform in transform_by_endpoint.items():
            start_ind = 0
            while True:
-                doc_batch, num_results = self._get_doc_batch(
-                    batch_size=self.batch_size,
-                    bookstack_client=self.bookstack_client,
-                    endpoint=endpoint,
-                    transformer=transform,
-                    start_ind=start_ind,
-                    start=start,
-                    end=end,
-                )
-                start_ind += num_results
-                if doc_batch:
-                    yield doc_batch
+                try:
+                    doc_batch, num_results = self._get_doc_batch(
+                        batch_size=self.batch_size,
+                        bookstack_client=self.bookstack_client,
+                        endpoint=endpoint,
+                        transformer=transform,
+                        start_ind=start_ind,
+                        start=start,
+                        end=end,
+                    )
+                    start_ind += num_results
+                    if doc_batch:
+                        yield doc_batch

-                if num_results < self.batch_size:
+                    if num_results < self.batch_size:
+                        break
+                    else:
+                        time.sleep(0.2)
+                except Exception as e:
+                    # Handle case where user hasn't properly set up permissions for the API key and we
+                    # fail on a specific resource (e.g. /books, /chapters, etc.)
+
+                    if (
+                        "BookStack Client request failed with status 403: Forbidden"
+                        in str(e)
+                    ):
+                        raise
                    break
-                else:
-                    time.sleep(0.2)
--- a/backend/onyx/connectors/factory.py
+++ b/backend/onyx/connectors/factory.py
@@ -20,6 +20,7 @@ from onyx.connectors.egnyte.connector import EgnyteConnector
 from onyx.connectors.file.connector import LocalFileConnector
 from onyx.connectors.fireflies.connector import FirefliesConnector
 from onyx.connectors.freshdesk.connector import FreshdeskConnector
+from onyx.connectors.gitbook.connector import GitbookConnector
 from onyx.connectors.github.connector import GithubConnector
 from onyx.connectors.gitlab.connector import GitlabConnector
 from onyx.connectors.gmail.connector import GmailConnector
@@ -71,6 +72,7 @@ def identify_connector_class(
        DocumentSource.GITHUB: GithubConnector,
        DocumentSource.GMAIL: GmailConnector,
        DocumentSource.GITLAB: GitlabConnector,
+        DocumentSource.GITBOOK: GitbookConnector,
        DocumentSource.GOOGLE_DRIVE: GoogleDriveConnector,
        DocumentSource.BOOKSTACK: BookstackConnector,
        DocumentSource.CONFLUENCE: ConfluenceConnector,
--- a/backend/onyx/connectors/gitbook/init.py
+++ b/backend/onyx/connectors/gitbook/init.py
--- a/backend/onyx/connectors/gitbook/connector.py
+++ b/backend/onyx/connectors/gitbook/connector.py
@@ -0,0 +1,279 @@
+from datetime import datetime
+from datetime import timezone
+from typing import Any
+from urllib.parse import urljoin
+
+import requests
+
+from onyx.configs.app_configs import INDEX_BATCH_SIZE
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.interfaces import GenerateDocumentsOutput
+from onyx.connectors.interfaces import LoadConnector
+from onyx.connectors.interfaces import PollConnector
+from onyx.connectors.interfaces import SecondsSinceUnixEpoch
+from onyx.connectors.models import ConnectorMissingCredentialError
+from onyx.connectors.models import Document
+from onyx.connectors.models import Section
+from onyx.utils.logger import setup_logger
+
+
+logger = setup_logger()
+
+GITBOOK_API_BASE = "https://api.gitbook.com/v1/"
+
+
+class GitbookApiClient:
+    def __init__(self, access_token: str) -> None:
+        self.access_token = access_token
+
+    def get(self, endpoint: str, params: dict[str, Any] | None = None) -> Any:
+        headers = {
+            "Authorization": f"Bearer {self.access_token}",
+            "Content-Type": "application/json",
+        }
+
+        url = urljoin(GITBOOK_API_BASE, endpoint.lstrip("/"))
+        response = requests.get(url, headers=headers, params=params)
+        response.raise_for_status()
+        return response.json()
+
+    def get_page_content(self, space_id: str, page_id: str) -> dict[str, Any]:
+        return self.get(f"/spaces/{space_id}/content/page/{page_id}")
+
+
+def _extract_text_from_document(document: dict[str, Any]) -> str:
+    """Extract text content from GitBook document structure by parsing the document nodes
+    into markdown format."""
+
+    def parse_leaf(leaf: dict[str, Any]) -> str:
+        text = leaf.get("text", "")
+        leaf.get("marks", [])
+        return text
+
+    def parse_text_node(node: dict[str, Any]) -> str:
+        text = ""
+        for leaf in node.get("leaves", []):
+            text += parse_leaf(leaf)
+        return text
+
+    def parse_block_node(node: dict[str, Any]) -> str:
+        block_type = node.get("type", "")
+        result = ""
+
+        if block_type == "heading-1":
+            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
+            result = f"# {text}\n\n"
+
+        elif block_type == "heading-2":
+            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
+            result = f"## {text}\n\n"
+
+        elif block_type == "heading-3":
+            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
+            result = f"### {text}\n\n"
+
+        elif block_type == "heading-4":
+            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
+            result = f"#### {text}\n\n"
+
+        elif block_type == "heading-5":
+            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
+            result = f"##### {text}\n\n"
+
+        elif block_type == "heading-6":
+            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
+            result = f"###### {text}\n\n"
+
+        elif block_type == "list-unordered":
+            for list_item in node.get("nodes", []):
+                paragraph = list_item.get("nodes", [])[0]
+                text = "".join(parse_text_node(n) for n in paragraph.get("nodes", []))
+                result += f"* {text}\n"
+            result += "\n"
+
+        elif block_type == "paragraph":
+            text = "".join(parse_text_node(n) for n in node.get("nodes", []))
+            result = f"{text}\n\n"
+
+        elif block_type == "list-tasks":
+            for task_item in node.get("nodes", []):
+                checked = task_item.get("data", {}).get("checked", False)
+                paragraph = task_item.get("nodes", [])[0]
+                text = "".join(parse_text_node(n) for n in paragraph.get("nodes", []))
+                checkbox = "[x]" if checked else "[ ]"
+                result += f"- {checkbox} {text}\n"
+            result += "\n"
+
+        elif block_type == "code":
+            for code_line in node.get("nodes", []):
+                if code_line.get("type") == "code-line":
+                    text = "".join(
+                        parse_text_node(n) for n in code_line.get("nodes", [])
+                    )
+                    result += f"{text}\n"
+            result += "\n"
+
+        elif block_type == "blockquote":
+            for quote_node in node.get("nodes", []):
+                if quote_node.get("type") == "paragraph":
+                    text = "".join(
+                        parse_text_node(n) for n in quote_node.get("nodes", [])
+                    )
+                    result += f"> {text}\n"
+            result += "\n"
+
+        elif block_type == "table":
+            records = node.get("data", {}).get("records", {})
+            definition = node.get("data", {}).get("definition", {})
+            view = node.get("data", {}).get("view", {})
+
+            columns = view.get("columns", [])
+
+            header_cells = []
+            for col_id in columns:
+                col_def = definition.get(col_id, {})
+                header_cells.append(col_def.get("title", ""))
+
+            result = "| " + " | ".join(header_cells) + " |\n"
+            result += "|" + "---|" * len(header_cells) + "\n"
+
+            sorted_records = sorted(
+                records.items(), key=lambda x: x[1].get("orderIndex", "")
+            )
+
+            for record_id, record_data in sorted_records:
+                values = record_data.get("values", {})
+                row_cells = []
+                for col_id in columns:
+                    fragment_id = values.get(col_id, "")
+                    fragment_text = ""
+                    for fragment in node.get("fragments", []):
+                        if fragment.get("fragment") == fragment_id:
+                            for frag_node in fragment.get("nodes", []):
+                                if frag_node.get("type") == "paragraph":
+                                    fragment_text = "".join(
+                                        parse_text_node(n)
+                                        for n in frag_node.get("nodes", [])
+                                    )
+                                    break
+                    row_cells.append(fragment_text)
+                result += "| " + " | ".join(row_cells) + " |\n"
+
+            result += "\n"
+        return result
+
+    if not document or "document" not in document:
+        return ""
+
+    markdown = ""
+    nodes = document["document"].get("nodes", [])
+
+    for node in nodes:
+        markdown += parse_block_node(node)
+
+    return markdown
+
+
+def _convert_page_to_document(
+    client: GitbookApiClient, space_id: str, page: dict[str, Any]
+) -> Document:
+    page_id = page["id"]
+    page_content = client.get_page_content(space_id, page_id)
+
+    return Document(
+        id=f"gitbook-{space_id}-{page_id}",
+        sections=[
+            Section(
+                link=page.get("urls", {}).get("app", ""),
+                text=_extract_text_from_document(page_content),
+            )
+        ],
+        source=DocumentSource.GITBOOK,
+        semantic_identifier=page.get("title", ""),
+        doc_updated_at=datetime.fromisoformat(page["updatedAt"]).replace(
+            tzinfo=timezone.utc
+        ),
+        metadata={
+            "path": page.get("path", ""),
+            "type": page.get("type", ""),
+            "kind": page.get("kind", ""),
+        },
+    )
+
+
+class GitbookConnector(LoadConnector, PollConnector):
+    def __init__(
+        self,
+        space_id: str,
+        batch_size: int = INDEX_BATCH_SIZE,
+    ) -> None:
+        self.space_id = space_id
+        self.batch_size = batch_size
+        self.access_token: str | None = None
+        self.client: GitbookApiClient | None = None
+
+    def load_credentials(self, credentials: dict[str, Any]) -> None:
+        access_token = credentials.get("gitbook_api_key")
+        if not access_token:
+            raise ConnectorMissingCredentialError("GitBook access token")
+        self.access_token = access_token
+        self.client = GitbookApiClient(access_token)
+
+    def _fetch_all_pages(
+        self,
+        start: datetime | None = None,
+        end: datetime | None = None,
+    ) -> GenerateDocumentsOutput:
+        if not self.client:
+            raise ConnectorMissingCredentialError("GitBook")
+
+        try:
+            content = self.client.get(f"/spaces/{self.space_id}/content")
+            pages = content.get("pages", [])
+
+            current_batch: list[Document] = []
+            for page in pages:
+                updated_at = datetime.fromisoformat(page["updatedAt"])
+
+                if start and updated_at < start:
+                    if current_batch:
+                        yield current_batch
+                    return
+                if end and updated_at > end:
+                    continue
+
+                current_batch.append(
+                    _convert_page_to_document(self.client, self.space_id, page)
+                )
+
+                if len(current_batch) >= self.batch_size:
+                    yield current_batch
+                    current_batch = []
+
+            if current_batch:
+                yield current_batch
+
+        except requests.RequestException as e:
+            logger.error(f"Error fetching GitBook content: {str(e)}")
+            raise
+
+    def load_from_state(self) -> GenerateDocumentsOutput:
+        return self._fetch_all_pages()
+
+    def poll_source(
+        self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
+    ) -> GenerateDocumentsOutput:
+        start_datetime = datetime.fromtimestamp(start, tz=timezone.utc)
+        end_datetime = datetime.fromtimestamp(end, tz=timezone.utc)
+        return self._fetch_all_pages(start_datetime, end_datetime)
+
+
+if __name__ == "__main__":
+    import os
+
+    connector = GitbookConnector(
+        space_id=os.environ["GITBOOK_SPACE_ID"],
+    )
+    connector.load_credentials({"gitbook_api_key": os.environ["GITBOOK_API_KEY"]})
+    document_batches = connector.load_from_state()
+    print(next(document_batches))
--- a/backend/onyx/connectors/google_drive/connector.py
+++ b/backend/onyx/connectors/google_drive/connector.py
@@ -302,7 +302,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
            if e.status_code == 401:
                # fail gracefully, let the other impersonations continue
                # one user without access shouldn't block the entire connector
-                logger.exception(
+                logger.warning(
                    f"User '{user_email}' does not have access to the drive APIs."
                )
                return
--- a/backend/onyx/connectors/onyx_jira/connector.py
+++ b/backend/onyx/connectors/onyx_jira/connector.py
@@ -145,7 +145,8 @@ def fetch_jira_issues_batch(
            id=page_url,
            sections=[Section(link=page_url, text=ticket_content)],
            source=DocumentSource.JIRA,
-            semantic_identifier=issue.fields.summary,
+            semantic_identifier=f"{issue.key}: {issue.fields.summary}",
+            title=f"{issue.key} {issue.fields.summary}",
            doc_updated_at=time_str_to_utc(issue.fields.updated),
            primary_owners=list(people) or None,
            # TODO add secondary_owners (commenters) if needed
--- a/backend/onyx/context/search/pipeline.py
+++ b/backend/onyx/context/search/pipeline.py
@@ -51,6 +51,7 @@ class SearchPipeline:
        user: User | None,
        llm: LLM,
        fast_llm: LLM,
+        skip_query_analysis: bool,
        db_session: Session,
        bypass_acl: bool = False,  # NOTE: VERY DANGEROUS, USE WITH CAUTION
        retrieval_metrics_callback: (
@@ -61,10 +62,13 @@ class SearchPipeline:
        rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
        prompt_config: PromptConfig | None = None,
    ):
+        # NOTE: The Search Request contains a lot of fields that are overrides, many of them can be None
+        # and typically are None. The preprocessing will fetch default values to replace these empty overrides.
        self.search_request = search_request
        self.user = user
        self.llm = llm
        self.fast_llm = fast_llm
+        self.skip_query_analysis = skip_query_analysis
        self.db_session = db_session
        self.bypass_acl = bypass_acl
        self.retrieval_metrics_callback = retrieval_metrics_callback
@@ -106,6 +110,7 @@ class SearchPipeline:
            search_request=self.search_request,
            user=self.user,
            llm=self.llm,
+            skip_query_analysis=self.skip_query_analysis,
            db_session=self.db_session,
            bypass_acl=self.bypass_acl,
        )
@@ -160,6 +165,12 @@ class SearchPipeline:
        that have a corresponding chunk.

        This step should be fast for any document index implementation.
+
+        Current implementation timing is approximately broken down in timing as:
+        - 200 ms to get the embedding of the query
+        - 15 ms to get chunks from the document index
+        - possibly more to get additional surrounding chunks
+        - possibly more for query expansion (multilingual)
        """
        if self._retrieved_sections is not None:
            return self._retrieved_sections
--- a/backend/onyx/context/search/postprocessing/postprocessing.py
+++ b/backend/onyx/context/search/postprocessing/postprocessing.py
@@ -15,6 +15,7 @@ from onyx.context.search.models import InferenceChunk
 from onyx.context.search.models import InferenceChunkUncleaned
 from onyx.context.search.models import InferenceSection
 from onyx.context.search.models import MAX_METRICS_CONTENT
+from onyx.context.search.models import RerankingDetails
 from onyx.context.search.models import RerankMetricsContainer
 from onyx.context.search.models import SearchQuery
 from onyx.document_index.document_index_utils import (
@@ -77,7 +78,8 @@ def cleanup_chunks(chunks: list[InferenceChunkUncleaned]) -> list[InferenceChunk

@log_function_time(print_only=True)
 def semantic_reranking(
-    query: SearchQuery,
+    query_str: str,
+    rerank_settings: RerankingDetails,
    chunks: list[InferenceChunk],
    model_min: int = CROSS_ENCODER_RANGE_MIN,
    model_max: int = CROSS_ENCODER_RANGE_MAX,
@@ -88,11 +90,9 @@ def semantic_reranking(

    Note: this updates the chunks in place, it updates the chunk scores which came from retrieval
    """
-    rerank_settings = query.rerank_settings
-
-    if not rerank_settings or not rerank_settings.rerank_model_name:
-        # Should never reach this part of the flow without reranking settings
-        raise RuntimeError("Reranking flow should not be running")
+    assert (
+        rerank_settings.rerank_model_name
+    ), "Reranking flow cannot run without a specific model"

    chunks_to_rerank = chunks[: rerank_settings.num_rerank]

@@ -107,7 +107,7 @@ def semantic_reranking(
        f"{chunk.semantic_identifier or chunk.title or ''}\n{chunk.content}"
        for chunk in chunks_to_rerank
    ]
-    sim_scores_floats = cross_encoder.predict(query=query.query, passages=passages)
+    sim_scores_floats = cross_encoder.predict(query=query_str, passages=passages)

    # Old logic to handle multiple cross-encoders preserved but not used
    sim_scores = [numpy.array(sim_scores_floats)]
@@ -165,8 +165,20 @@ def semantic_reranking(
    return list(ranked_chunks), list(ranked_indices)


+def should_rerank(rerank_settings: RerankingDetails | None) -> bool:
+    """Based on the RerankingDetails model, only run rerank if the following conditions are met:
+    - rerank_model_name is not None
+    - num_rerank is greater than 0
+    """
+    if not rerank_settings:
+        return False
+
+    return bool(rerank_settings.rerank_model_name and rerank_settings.num_rerank > 0)
+
+
 def rerank_sections(
-    query: SearchQuery,
+    query_str: str,
+    rerank_settings: RerankingDetails,
    sections_to_rerank: list[InferenceSection],
    rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None,
 ) -> list[InferenceSection]:
@@ -181,16 +193,13 @@ def rerank_sections(
    """
    chunks_to_rerank = [section.center_chunk for section in sections_to_rerank]

-    if not query.rerank_settings:
-        # Should never reach this part of the flow without reranking settings
-        raise RuntimeError("Reranking settings not found")
-
    ranked_chunks, _ = semantic_reranking(
-        query=query,
+        query_str=query_str,
+        rerank_settings=rerank_settings,
        chunks=chunks_to_rerank,
        rerank_metrics_callback=rerank_metrics_callback,
    )
-    lower_chunks = chunks_to_rerank[query.rerank_settings.num_rerank :]
+    lower_chunks = chunks_to_rerank[rerank_settings.num_rerank :]

    # Scores from rerank cannot be meaningfully combined with scores without rerank
    # However the ordering is still important
@@ -260,16 +269,13 @@ def search_postprocessing(

    rerank_task_id = None
    sections_yielded = False
-    if (
-        search_query.rerank_settings
-        and search_query.rerank_settings.rerank_model_name
-        and search_query.rerank_settings.num_rerank > 0
-    ):
+    if should_rerank(search_query.rerank_settings):
        post_processing_tasks.append(
            FunctionCall(
                rerank_sections,
                (
-                    search_query,
+                    search_query.query,
+                    search_query.rerank_settings,  # Cannot be None here
                    retrieved_sections,
                    rerank_metrics_callback,
                ),
--- a/backend/onyx/context/search/preprocessing/preprocessing.py
+++ b/backend/onyx/context/search/preprocessing/preprocessing.py
@@ -50,11 +50,11 @@ def retrieval_preprocessing(
    search_request: SearchRequest,
    user: User | None,
    llm: LLM,
+    skip_query_analysis: bool,
    db_session: Session,
-    bypass_acl: bool = False,
-    skip_query_analysis: bool = False,
-    base_recency_decay: float = BASE_RECENCY_DECAY,
    favor_recent_decay_multiplier: float = FAVOR_RECENT_DECAY_MULTIPLIER,
+    base_recency_decay: float = BASE_RECENCY_DECAY,
+    bypass_acl: bool = False,
 ) -> SearchQuery:
    """Logic is as follows:
    Any global disables apply first
@@ -146,7 +146,7 @@ def retrieval_preprocessing(
    is_keyword, extracted_keywords = (
        parallel_results[run_query_analysis.result_id]
        if run_query_analysis
-        else (None, None)
+        else (False, None)
    )

    all_query_terms = query.split()
--- a/backend/onyx/db/background_error.py
+++ b/backend/onyx/db/background_error.py
@@ -0,0 +1,10 @@
+from sqlalchemy.orm import Session
+
+from onyx.db.models import BackgroundError
+
+
+def create_background_error(
+    db_session: Session, message: str, cc_pair_id: int | None
+) -> None:
+    db_session.add(BackgroundError(message=message, cc_pair_id=cc_pair_id))
+    db_session.commit()
--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@@ -350,13 +350,14 @@ def delete_chat_session(
    user_id: UUID | None,
    chat_session_id: UUID,
    db_session: Session,
+    include_deleted: bool = False,
    hard_delete: bool = HARD_DELETE_CHATS,
 ) -> None:
    chat_session = get_chat_session_by_id(
        chat_session_id=chat_session_id, user_id=user_id, db_session=db_session
    )

-    if chat_session.deleted:
+    if chat_session.deleted and not include_deleted:
        raise ValueError("Cannot delete an already deleted chat session")

    if hard_delete:
@@ -380,7 +381,15 @@ def delete_chat_sessions_older_than(days_old: int, db_session: Session) -> None:
    ).fetchall()

    for user_id, session_id in old_sessions:
-        delete_chat_session(user_id, session_id, db_session, hard_delete=True)
+        try:
+            delete_chat_session(
+                user_id, session_id, db_session, include_deleted=True, hard_delete=True
+            )
+        except Exception:
+            logger.exception(
+                "delete_chat_session exceptioned. "
+                f"user_id={user_id} session_id={session_id}"
+            )


 def get_chat_message(
@@ -893,14 +902,18 @@ def translate_db_sub_questions_to_server_objects(
                question=sub_question.sub_question,
                answer=sub_question.sub_answer,
                sub_queries=sub_queries,
-                context_docs=get_retrieval_docs_from_search_docs(verified_docs),
+                context_docs=get_retrieval_docs_from_search_docs(
+                    verified_docs, sort_by_score=False
+                ),
            )
        )
    return sub_questions


 def get_retrieval_docs_from_search_docs(
-    search_docs: list[SearchDoc], remove_doc_content: bool = False
+    search_docs: list[SearchDoc],
+    remove_doc_content: bool = False,
+    sort_by_score: bool = True,
 ) -> RetrievalDocs:
    top_documents = [
        translate_db_search_doc_to_server_search_doc(
@@ -908,7 +921,8 @@ def get_retrieval_docs_from_search_docs(
        )
        for db_doc in search_docs
    ]
-    top_documents = sorted(top_documents, key=lambda doc: doc.score, reverse=True)  # type: ignore
+    if sort_by_score:
+        top_documents = sorted(top_documents, key=lambda doc: doc.score, reverse=True)  # type: ignore
    return RetrievalDocs(top_documents=top_documents)


@@ -1018,7 +1032,7 @@ def log_agent_sub_question_results(
        sub_question = sub_question_answer_result.question
        sub_answer = sub_question_answer_result.answer
        sub_document_results = _create_citation_format_list(
-            sub_question_answer_result.verified_reranked_documents
+            sub_question_answer_result.context_documents
        )

        sub_question_object = AgentSubQuestion(
--- a/backend/onyx/db/models.py
+++ b/backend/onyx/db/models.py
@@ -483,6 +483,10 @@ class ConnectorCredentialPair(Base):
        primaryjoin="foreign(ConnectorCredentialPair.creator_id) == remote(User.id)",
    )

+    background_errors: Mapped[list["BackgroundError"]] = relationship(
+        "BackgroundError", back_populates="cc_pair", cascade="all, delete-orphan"
+    )
+

 class Document(Base):
    __tablename__ = "document"
@@ -2115,6 +2119,31 @@ class StandardAnswer(Base):
    )


+class BackgroundError(Base):
+    """Important background errors. Serves to:
+    1. Ensure that important logs are kept around and not lost on rotation/container restarts
+    2. A trail for high-signal events so that the debugger doesn't need to remember/know every
+       possible relevant log line.
+    """
+
+    __tablename__ = "background_error"
+
+    id: Mapped[int] = mapped_column(primary_key=True)
+    message: Mapped[str] = mapped_column(String)
+    time_created: Mapped[datetime.datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now()
+    )
+
+    # option to link the error to a specific CC Pair
+    cc_pair_id: Mapped[int | None] = mapped_column(
+        ForeignKey("connector_credential_pair.id", ondelete="CASCADE"), nullable=True
+    )
+
+    cc_pair: Mapped["ConnectorCredentialPair | None"] = relationship(
+        "ConnectorCredentialPair", back_populates="background_errors"
+    )
+
+
 """Tables related to Permission Sync"""


--- a/backend/onyx/llm/chat_llm.py
+++ b/backend/onyx/llm/chat_llm.py
@@ -52,6 +52,18 @@ litellm.telemetry = False
 _LLM_PROMPT_LONG_TERM_LOG_CATEGORY = "llm_prompt"


+class LLMTimeoutError(Exception):
+    """
+    Exception raised when an LLM call times out.
+    """
+
+
+class LLMRateLimitError(Exception):
+    """
+    Exception raised when an LLM call is rate limited.
+    """
+
+
 def _base_msg_to_role(msg: BaseMessage) -> str:
    if isinstance(msg, HumanMessage) or isinstance(msg, HumanMessageChunk):
        return "user"
@@ -389,6 +401,7 @@ class DefaultMultiLLM(LLM):
        tool_choice: ToolChoiceOptions | None,
        stream: bool,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> litellm.ModelResponse | litellm.CustomStreamWrapper:
        # litellm doesn't accept LangChain BaseMessage objects, so we need to convert them
        # to a dict representation
@@ -396,9 +409,14 @@ class DefaultMultiLLM(LLM):
        self._record_call(processed_prompt)

        try:
+            print(
+                "model is",
+                f"{self.config.model_provider}/{self.config.deployment_name or self.config.model_name}",
+            )
            return litellm.completion(
                mock_response=MOCK_LLM_RESPONSE,
                # model choice
+                # model="openai/gpt-4",
                model=f"{self.config.model_provider}/{self.config.deployment_name or self.config.model_name}",
                # NOTE: have to pass in None instead of empty string for these
                # otherwise litellm can have some issues with bedrock
@@ -414,7 +432,7 @@ class DefaultMultiLLM(LLM):
                stream=stream,
                # model params
                temperature=0,
-                timeout=self._timeout,
+                timeout=timeout_override or self._timeout,
                # For now, we don't support parallel tool calls
                # NOTE: we can't pass this in if tools are not specified
                # or else OpenAI throws an error
@@ -433,6 +451,12 @@ class DefaultMultiLLM(LLM):
        except Exception as e:
            self._record_error(processed_prompt, e)
            # for break pointing
+            if isinstance(e, litellm.Timeout):
+                raise LLMTimeoutError(e)
+
+            elif isinstance(e, litellm.RateLimitError):
+                raise LLMRateLimitError(e)
+
            raise e

    @property
@@ -453,6 +477,7 @@ class DefaultMultiLLM(LLM):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> BaseMessage:
        if LOG_DANSWER_MODEL_INTERACTIONS:
            self.log_model_configs()
@@ -460,7 +485,12 @@ class DefaultMultiLLM(LLM):
        response = cast(
            litellm.ModelResponse,
            self._completion(
-                prompt, tools, tool_choice, False, structured_response_format
+                prompt=prompt,
+                tools=tools,
+                tool_choice=tool_choice,
+                stream=False,
+                structured_response_format=structured_response_format,
+                timeout_override=timeout_override,
            ),
        )
        choice = response.choices[0]
@@ -478,19 +508,31 @@ class DefaultMultiLLM(LLM):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> Iterator[BaseMessage]:
        if LOG_DANSWER_MODEL_INTERACTIONS:
            self.log_model_configs()

        if DISABLE_LITELLM_STREAMING:
-            yield self.invoke(prompt, tools, tool_choice, structured_response_format)
+            yield self.invoke(
+                prompt,
+                tools,
+                tool_choice,
+                structured_response_format,
+                timeout_override,
+            )
            return

        output = None
        response = cast(
            litellm.CustomStreamWrapper,
            self._completion(
-                prompt, tools, tool_choice, True, structured_response_format
+                prompt=prompt,
+                tools=tools,
+                tool_choice=tool_choice,
+                stream=True,
+                structured_response_format=structured_response_format,
+                timeout_override=timeout_override,
            ),
        )
        try:
--- a/backend/onyx/llm/custom_llm.py
+++ b/backend/onyx/llm/custom_llm.py
@@ -81,6 +81,7 @@ class CustomModelServer(LLM):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> BaseMessage:
        return self._execute(prompt)

@@ -90,5 +91,6 @@ class CustomModelServer(LLM):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> Iterator[BaseMessage]:
        yield self._execute(prompt)
--- a/backend/onyx/llm/interfaces.py
+++ b/backend/onyx/llm/interfaces.py
@@ -90,12 +90,13 @@ class LLM(abc.ABC):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> BaseMessage:
        self._precall(prompt)
        # TODO add a postcall to log model outputs independent of concrete class
        # implementation
        return self._invoke_implementation(
-            prompt, tools, tool_choice, structured_response_format
+            prompt, tools, tool_choice, structured_response_format, timeout_override
        )

    @abc.abstractmethod
@@ -105,6 +106,7 @@ class LLM(abc.ABC):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> BaseMessage:
        raise NotImplementedError

@@ -114,12 +116,13 @@ class LLM(abc.ABC):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> Iterator[BaseMessage]:
        self._precall(prompt)
        # TODO add a postcall to log model outputs independent of concrete class
        # implementation
        messages = self._stream_implementation(
-            prompt, tools, tool_choice, structured_response_format
+            prompt, tools, tool_choice, structured_response_format, timeout_override
        )

        tokens = []
@@ -138,5 +141,6 @@ class LLM(abc.ABC):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> Iterator[BaseMessage]:
        raise NotImplementedError
--- a/backend/onyx/main.py
+++ b/backend/onyx/main.py
@@ -238,12 +238,17 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
        await close_auth_limiter()


-def log_http_error(_: Request, exc: Exception) -> JSONResponse:
+def log_http_error(request: Request, exc: Exception) -> JSONResponse:
    status_code = getattr(exc, "status_code", 500)

    if isinstance(exc, BasicAuthenticationError):
-        # For BasicAuthenticationError, just log a brief message without stack trace (almost always spam)
-        logger.warning(f"Authentication failed: {str(exc)}")
+        # For BasicAuthenticationError, just log a brief message without stack trace
+        # (almost always spammy)
+        logger.debug(f"Authentication failed: {str(exc)}")
+
+    elif status_code == 404 and request.url.path == "/metrics":
+        # Log 404 errors for the /metrics endpoint with debug level
+        logger.debug(f"404 error for /metrics endpoint: {str(exc)}")

    elif status_code >= 400:
        error_msg = f"{str(exc)}\n"
--- a/backend/onyx/natural_language_processing/utils.py
+++ b/backend/onyx/natural_language_processing/utils.py
@@ -99,7 +99,7 @@ def _check_tokenizer_cache(

        if not tokenizer:
            logger.info(
-                f"Falling back to default embedding model: {DOCUMENT_ENCODER_MODEL}"
+                f"Falling back to default embedding model tokenizer: {DOCUMENT_ENCODER_MODEL}"
            )
            tokenizer = HuggingFaceTokenizer(DOCUMENT_ENCODER_MODEL)

--- a/backend/onyx/prompts/agent_search.py
+++ b/backend/onyx/prompts/agent_search.py
@@ -1,3 +1,7 @@
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_ANSWER_SEPARATOR,
+)
+
 # Standards
 SEPARATOR_LINE = "-------"
 SEPARATOR_LINE_LONG = "---------------"
@@ -5,8 +9,6 @@ UNKNOWN_ANSWER = "I do not have enough information to answer this question."
 NO_RECOVERED_DOCS = "No relevant information recovered"
 YES = "yes"
 NO = "no"
-
-
 # Framing/Support/Template Prompts
 HISTORY_FRAMING_PROMPT = f"""
 For more context, here is the history of the conversation so far that preceded this question:
@@ -16,6 +18,43 @@ For more context, here is the history of the conversation so far that preceded t
 """.strip()


+COMMON_RAG_RULES = f"""
+IMPORTANT RULES:
+ - If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer. \
+You may give some additional facts you learned, but do not try to invent an answer.
+
+ - If the information is empty or irrelevant, just say "{UNKNOWN_ANSWER}".
+
+ - If the information is relevant but not fully conclusive, provide an answer to the extent you can but also specify that \
+the information is not conclusive and why.
+
+- When constructing/considering categories, focus less on the question and more on the context actually provided! \
+Example: if the question is about the products of company A, and the content provided lists a number of products, \
+do automatically NOT ASSUME that those belong to company A!  So you cannot list those as products of company A, despite \
+the fact that the question is about company A's products. What you should say instead is maybe something like \
+"Here are a number of products, but I cannot say whether some or all of them belong to company A: \
+<proceed with listing the products>". It is ABSOLUTELY ESSENTIAL that the answer constructed reflects \
+actual knowledge. For that matter, also consider the title of the document and other information that may be \
+provided. If that does not make it clear that - in the example above - the products belong to company A, \
+then do not list them as products of company A, just maybe as "A list products that may not necessarily \
+belong to company A". THIS IS IMPORTANT!
+
+- Related, if the context provides a list of items with associated data or other information that seems \
+to align with the categories in the question, but does not specify whether the items or the information is \
+specific to the exact requested category, then present the information with a disclaimer. Use a title such as \
+"I am not sure whether these items (or the information provided) is specific to [relevant category] or whether \
+these are all [specific group], but I found this information may be helpful:" \
+followed by the list of items and associated data/or information discovered.
+
+ - Do not group together items amongst one headline where not all items belong to the category of the headline! \
+(Example: "Products used by Company A" where some products listed are not built by Company A, but other companies,
+or it is not clear that the products are built by Company A). Only state what you know for sure!
+
+ - Do NOT perform any calculations in the answer! Just report on facts.
+
+ - If appropriate, organizing your answer in bullet points is often useful.
+""".strip()
+
 ASSISTANT_SYSTEM_PROMPT_DEFAULT = "You are an assistant for question-answering tasks."

 ASSISTANT_SYSTEM_PROMPT_PERSONA = f"""
@@ -129,20 +168,44 @@ History summary:
 # Sub-question
 # Intentionally left a copy in case we want to modify this one differently
 INITIAL_QUESTION_DECOMPOSITION_PROMPT = f"""
-Decompose the initial user question into no more than 3 appropriate sub-questions that help to answer the \
-original question. The purpose for this decomposition may be to:
-  1) isolate individual entities (i.e., 'compare sales of company A and company B' -> \
+Please create a list of no more than 3 sub-questions whose answers would help to inform the answer \
+to the initial question.
+
+The purpose for these sub-questions could be:
+  1) decomposition to isolate individual entities (i.e., 'compare sales of company A and company B' -> \
 ['what are sales for company A', 'what are sales for company B'])
-  2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> \
+
+  2) clarification and/or disambiguation of ambiguous terms (i.e., 'what is our success with company A' -> \
 ['what are our sales with company A','what is our market share with company A', \
 'is company A a reference customer for us', etc.])
-  3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you \
-are generally familiar with the entity, then you can decompose the question into sub-questions that are more \
-specific to components (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve \
-scalability of product X', 'what do we do to improve stability of product X', ...])
-  4) research an area that could really help to answer the question.

-Here is the initial question to decompose:
+  3) if a term or a metric is essentially clear, but it could relate to various aspects of an entity and you \
+are generally familiar with the entity, then you can create sub-questions that are more \
+specific (i.e.,  'what do we do to improve product X' -> 'what do we do to improve scalability of product X', \
+'what do we do to improve performance of product X', 'what do we do to improve stability of product X', ...)
+
+  4) research individual questions and areas that should really help to ultimately answer the question.
+
+Important:
+
+ - Each sub-question should lend itself to be answered by a RAG system. Correspondingly, phrase the question \
+in a way that is amenable to that. An example set of sub-questions based on an initial question could look like this:
+'what can I do to improve the performance of workflow X' -> \
+'what are the settings affecting performance for workflow X', 'are there complaints and bugs related to \
+workflow X performance', 'what are performance benchmarks for workflow X', ...
+
+ - Consequently, again, don't just decompose, but make sure that the sub-questions have the proper form. I.e., no \
+ 'I', etc.
+
+ - Do not(!) create sub-questions that are clarifying question to the person who asked the question, \
+like making suggestions or asking the user for more information! This is not useful for the actual \
+question-answering process! You need to take the information from the user as it is given to you! \
+For example, should the question be of the type 'why does product X perform poorly for customer A', DO NOT create a \
+sub-question of the type 'what are the settings that customer A uses for product X?'! A valid sub-question \
+could rather be 'which settings for product X have been shown to lead to poor performance for customers?'
+
+
+And here is the initial question to create sub-questions for, so that you have the full context:
 {SEPARATOR_LINE}
 {{question}}
 {SEPARATOR_LINE}
@@ -150,7 +213,79 @@ Here is the initial question to decompose:
 {{history}}

 Do NOT include any text in your answer outside of the list of sub-questions!
-Please formulate your answer as a newline-separated list of questions like so:
+Please formulate your answer as a newline-separated list of questions like so (and please ONLY ANSWER WITH THIS LIST! Do not \
+add any explanations or other text!):
+
+ <sub-question>
+ <sub-question>
+ <sub-question>
+ ...
+
+Answer:
+""".strip()
+
+# INITIAL PHASE - AWARE OF REFINEMENT
+# Sub-question
+# Suggest augmenting question generation as well, that a future refinement phase could use
+# to generate new questions
+# Intentionally left a copy in case we want to modify this one differently
+INITIAL_QUESTION_DECOMPOSITION_PROMPT_ASSUMING_REFINEMENT = f"""
+Please create a list of no more than 3 sub-questions whose answers would help to inform the answer \
+to the initial question.
+
+The purpose for these sub-questions could be:
+  1) decomposition to isolate individual entities (i.e., 'compare sales of company A and company B' -> \
+['what are sales for company A', 'what are sales for company B'])
+
+  2) clarification and/or disambiguation of ambiguous terms (i.e., 'what is our success with company A' -> \
+['what are our sales with company A','what is our market share with company A', \
+'is company A a reference customer for us', etc.])
+
+  3) if a term or a metric is essentially clear, but it could relate to various aspects of an entity and you \
+are generally familiar with the entity, then you can create sub-questions that are more \
+specific (i.e.,  'what do we do to improve product X' -> 'what do we do to improve scalability of product X', \
+'what do we do to improve performance of product X', 'what do we do to improve stability of product X', ...)
+
+  4) research individual questions and areas that should really help to ultimately answer the question.
+
+  5) if meaningful, find relevant facts that may inform another set of sub-questions generate after the set you \
+create now are answered. Example: 'which products have we implemented at company A, and is this different to \
+its competitors?'  could potentially create sub-questions 'what products have we implemented at company A', \
+and 'who are the competitors of company A'. The additional round of sub-question generation which sees the \
+answers for this initial round of sub-question creation could then use the answer to the second sub-question \
+(which could be 'company B and C are competitors of company A') to then ask 'which products have we implemented \
+at company B', 'which products have we implemented at company C'...
+
+Important:
+
+ - Each sub-question should lend itself to be answered by a RAG system. Correspondingly, phrase the question \
+in a way that is amenable to that. An example set of sub-questions based on an initial question could look like this:
+'what can I do to improve the performance of workflow X' -> \
+'what are the settings affecting performance for workflow X', 'are there complaints and bugs related to \
+workflow X performance', 'what are performance benchmarks for workflow X', ...
+
+ - Consequently, again, don't just decompose, but make sure that the sub-questions have the proper form. I.e., no \
+ 'I', etc.
+
+ - Do not(!) create sub-questions that are clarifying question to the person who asked the question, \
+like making suggestions or asking the user for more information! This is not useful for the actual \
+question-answering process! You need to take the information from the user as it is given to you! \
+For example, should the question be of the type 'why does product X perform poorly for customer A', DO NOT create a \
+sub-question of the type 'what are the settings that customer A uses for product X?'! A valid sub-question \
+could rather be 'which settings for product X have been shown to lead to poor performance for customers?'
+
+
+And here is the initial question to create sub-questions for:
+{SEPARATOR_LINE}
+{{question}}
+{SEPARATOR_LINE}
+
+{{history}}
+
+Do NOT include any text in your answer outside of the list of sub-questions!
+Please formulate your answer as a newline-separated list of questions like so (and please ONLY ANSWER WITH THIS LIST! Do not \
+add any explanations or other text!):
+
 <sub-question>
 <sub-question>
 <sub-question>
@@ -162,23 +297,47 @@ Answer:

 # TODO: combine shared pieces with INITIAL_QUESTION_DECOMPOSITION_PROMPT
 INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH = f"""
-Decompose the initial user question into no more than 3 appropriate sub-questions that help to answer the \
-original question. The purpose for this decomposition may be to:
-  1) isolate individual entities (i.e., 'compare sales of company A and company B' -> \
+Please create a list of no more than 3 sub-questions whose answers would help to inform the answer \
+to the initial question.
+
+The purpose for these sub-questions could be:
+  1) decomposition to isolate individual entities (i.e., 'compare sales of company A and company B' -> \
 ['what are sales for company A', 'what are sales for company B'])
-  2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> \
+
+  2) clarification and/or disambiguation of ambiguous terms (i.e., 'what is our success with company A' -> \
 ['what are our sales with company A','what is our market share with company A', \
 'is company A a reference customer for us', etc.])
-  3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you \
-are generally familiar with the entity, then you can decompose the question into sub-questions that are more \
-specific to components (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve \
-scalability of product X', 'what do we do to improve stability of product X', ...])
-  4) research an area that could really help to answer the question.
+
+  3) if a term or a metric is essentially clear, but it could relate to various aspects of an entity and you \
+are generally familiar with the entity, then you can create sub-questions that are more \
+specific (i.e.,  'what do we do to improve product X' -> 'what do we do to improve scalability of product X', \
+'what do we do to improve performance of product X', 'what do we do to improve stability of product X', ...)
+
+  4) research individual questions and areas that should really help to ultimately answer the question.
+
+Important:
+
+ - Each sub-question should lend itself to be answered by a RAG system. Correspondingly, phrase the question \
+in a way that is amenable to that. An example set of sub-questions based on an initial question could look like this:
+'what can I do to improve the performance of workflow X' -> \
+'what are the settings affecting performance for workflow X', 'are there complaints and bugs related to \
+workflow X performance', 'what are performance benchmarks for workflow X', ...
+
+ - Consequently, again, don't just decompose, but make sure that the sub-questions have the proper form. I.e., no \
+ 'I', etc.
+
+ - Do not(!) create sub-questions that are clarifying question to the person who asked the question, \
+like making suggestions or asking the user for more information! This is not useful for the actual \
+question-answering process! You need to take the information from the user as it is given to you! \
+For example, should the question be of the type 'why does product X perform poorly for customer A', DO NOT create a \
+sub-question of the type 'what are the settings that customer A uses for product X?'! A valid sub-question \
+could rather be 'which settings for product X have been shown to lead to poor performance for customers?'
+

 To give you some context, you will see below also some documents that may relate to the question. Please only \
 use this information to learn what the question is approximately asking about, but do not focus on the details \
 to construct the sub-questions! Also, some of the entities, relationships and terms that are in the dataset may \
-not be in these few documents, so DO NOT focussed too much on the documents when constructing the sub-questions! \
+not be in these few documents, so DO NOT focus too much on the documents when constructing the sub-questions! \
 Decomposition and disambiguations are most important!

 Here are the sample docs to give you some context:
@@ -186,7 +345,7 @@ Here are the sample docs to give you some context:
 {{sample_doc_str}}
 {SEPARATOR_LINE}

-And here is the initial question to decompose:
+And here is the initial question to create sub-questions for, so that you have the full context:
 {SEPARATOR_LINE}
 {{question}}
 {SEPARATOR_LINE}
@@ -194,7 +353,9 @@ And here is the initial question to decompose:
 {{history}}

 Do NOT include any text in your answer outside of the list of sub-questions!\
-Please formulate your answer as a newline-separated list of questions like so:
+Please formulate your answer as a newline-separated list of questions like so (and please ONLY ANSWER WITH THIS LIST! Do not \
+add any explanations or other text!):
+
 <sub-question>
 <sub-question>
 <sub-question>
@@ -203,6 +364,84 @@ Please formulate your answer as a newline-separated list of questions like so:
 Answer:
 """.strip()

+INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH_ASSUMING_REFINEMENT = f"""
+Please create a list of no more than 3 sub-questions whose answers would help to inform the answer \
+to the initial question.
+
+The purpose for these sub-questions could be:
+  1) decomposition to isolate individual entities (i.e., 'compare sales of company A and company B' -> \
+['what are sales for company A', 'what are sales for company B'])
+
+  2) clarification and/or disambiguation of ambiguous terms (i.e., 'what is our success with company A' -> \
+['what are our sales with company A','what is our market share with company A', \
+'is company A a reference customer for us', etc.])
+
+  3) if a term or a metric is essentially clear, but it could relate to various aspects of an entity and you \
+are generally familiar with the entity, then you can create sub-questions that are more \
+specific (i.e.,  'what do we do to improve product X' -> 'what do we do to improve scalability of product X', \
+'what do we do to improve performance of product X', 'what do we do to improve stability of product X', ...)
+
+  4) research individual questions and areas that should really help to ultimately answer the question.
+
+  5) if applicable and useful, consider using sub-questions to gather relevant information that can inform a \
+subsequent set of sub-questions. The answers to your initial sub-questions will be available when generating \
+the next set.
+For example, if you start with the question, "Which products have we implemented at Company A, and how does \
+this compare to its competitors?" you might first create sub-questions like "What products have we implemented \
+at Company A?" and "Who are the competitors of Company A?"
+The answer to the second sub-question, such as "Company B and C are competitors of Company A," can then be used \
+to generate more specific sub-questions in the next round, like "Which products have we implemented at Company B?" \
+and "Which products have we implemented at Company C?"
+
+You'll be the judge!
+
+Important:
+
+ - Each sub-question should lend itself to be answered by a RAG system. Correspondingly, phrase the question \
+in a way that is amenable to that. An example set of sub-questions based on an initial question could look like this:
+'what can I do to improve the performance of workflow X' -> \
+'what are the settings affecting performance for workflow X', 'are there complaints and bugs related to \
+workflow X performance', 'what are performance benchmarks for workflow X', ...
+
+ - Consequently, again, don't just decompose, but make sure that the sub-questions have the proper form. I.e., no \
+ 'I', etc.
+
+ - Do not(!) create sub-questions that are clarifying question to the person who asked the question, \
+like making suggestions or asking the user for more information! This is not useful for the actual \
+question-answering process! You need to take the information from the user as it is given to you! \
+For example, should the question be of the type 'why does product X perform poorly for customer A', DO NOT create a \
+sub-question of the type 'what are the settings that customer A uses for product X?'! A valid sub-question \
+could rather be 'which settings for product X have been shown to lead to poor performance for customers?'
+
+To give you some context, you will see below also some documents that may relate to the question. Please only \
+use this information to learn what the question is approximately asking about, but do not focus on the details \
+to construct the sub-questions! Also, some of the entities, relationships and terms that are in the dataset may \
+not be in these few documents, so DO NOT focus too much on the documents when constructing the sub-questions! \
+Decomposition and disambiguations are most important!
+
+Here are the sample docs to give you some context:
+{SEPARATOR_LINE}
+{{sample_doc_str}}
+{SEPARATOR_LINE}
+
+And here is the initial question to create sub-questions for, so that you have the full context:
+{SEPARATOR_LINE}
+{{question}}
+{SEPARATOR_LINE}
+
+{{history}}
+
+Do NOT include any text in your answer outside of the list of sub-questions!\
+Please formulate your answer as a newline-separated list of questions like so (and please ONLY ANSWER WITH THIS LIST! Do not \
+add any explanations or other text!):
+
+ <sub-question>
+ <sub-question>
+ <sub-question>
+ ...
+
+Answer:
+""".strip()

 # Retrieval
 QUERY_REWRITING_PROMPT = f"""
@@ -257,23 +496,35 @@ Answer:
 """.strip()


-# Sub-Question Anser Generation
+# Sub-Question Answer Generation
 SUB_QUESTION_RAG_PROMPT = f"""
 Use the context provided below - and only the provided context - to answer the given question. \
 (Note that the answer is in service of answering a broader question, given below as 'motivation').

-Again, only use the provided context and do not use your internal knowledge! If you cannot answer the \
-question based on the context, say "{UNKNOWN_ANSWER}". It is a matter of life and death that you do NOT \
-use your internal knowledge, just the provided information!
-
-Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal. \
+Make sure that you keep all relevant information, specifically as it concerns the ultimate goal. \
 (But keep other details as well.)

-It is critical that you provide inline citations in the format [D1], [D2], [D3], etc! \
+{COMMON_RAG_RULES}
+
+ - Make sure that you only state what you actually can positively learn from the provided context! Particularly \
+don't make assumptions!  Example: if i) a question you should answer is asking for products of companies that \
+are competitors of company A, and ii) the context mentions products of companies A, B, C, D, E, etc., do NOT assume \
+that B, C, D, E, etc. are competitors of A! All you know is that these are products of a number of companies, and you \
+would have to rely on another question - that you do not have access to - to learn which companies are competitors of A.
+Correspondingly, you should not say that these are the products of competitors of A, but rather something like \
+"Here are some products of various companies".
+
+It is critical that you provide inline citations in the format [D1], [D2], [D3], etc! Please use format [D1][D2] and NOT \
+[D1, D2] format if you cite two or more documents together! \
 It is important that the citation is close to the information it supports. \
 Proper citations are very important to the user!

-For your general information, here is the ultimate motivation:
+Here is the document context for you to consider:
+{SEPARATOR_LINE}
+{{context}}
+{SEPARATOR_LINE}
+
+For your general information, here is the ultimate motivation for the question you need to answer:
 {SEPARATOR_LINE}
 {{original_question}}
 {SEPARATOR_LINE}
@@ -283,12 +534,8 @@ And here is the actual question I want you to answer based on the context above
 {{question}}
 {SEPARATOR_LINE}

-Here is the context:
-{SEPARATOR_LINE}
-{{context}}
-{SEPARATOR_LINE}
-
-Please keep your answer brief and concise, and focus on facts and data.
+Please keep your answer brief and concise, and focus on facts and data. (Again, only state what you see in the documents \
+for sure and communicate if/in what way this may or may not relate to the question you need to answer!)

 Answer:
 """.strip()
@@ -321,22 +568,18 @@ Use the information provided below - and only the provided information - to answ

 The information provided below consists of:
  1) a number of answered sub-questions - these are very important to help you organize your thoughts and your answer
-  2) a number of documents that deemed relevant for the question.
+  2) a number of documents that are deemed relevant for the question.

 {{history}}

-It is critical that you provide prover inline citations to documents in the format [D1], [D2], [D3], etc.! \
+It is critical that you provide proper inline citations to documents in the format [D1], [D2], [D3], etc.! \
 It is important that the citation is close to the information it supports. If you have multiple citations that support \
 a fact, please cite for example as [D1][D3], or [D2][D4], etc. \
 Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the \
 sub-question citation. If you want to cite both a document and a sub-question, please use [D1][Q3], or [D2][D7][Q4], etc. \
 Again, please NEVER cite sub-questions without a document citation! Proper citations are very important for the user!

-IMPORTANT RULES:
- - If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer. \
-You may give some additional facts you learned, but do not try to invent an answer.
- - If the information is empty or irrelevant, just say "{UNKNOWN_ANSWER}".
- - If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
+{COMMON_RAG_RULES}

 Again, you should be sure that the answer is supported by the information provided!

@@ -361,7 +604,9 @@ And here is the question I want you to answer based on the information above:
 {{question}}
 {SEPARATOR_LINE}

-Please keep your answer brief and concise, and focus on facts and data.
+Please keep your answer brief and concise, and focus on facts and data. (Again, only state what you see in the documents for \
+sure and communicate if/in what way this may or may not relate to the question you need to answer! Use the answered \
+sub-questions as well, but be cautious and reconsider the docments again for validation.)

 Answer:
 """.strip()
@@ -376,11 +621,7 @@ The information provided below consists of a number of documents that were deeme

 {{history}}

-IMPORTANT RULES:
- - If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer. \
-You may give some additional facts you learned, but do not try to invent an answer.
- - If the information is irrelevant, just say "{UNKNOWN_ANSWER}".
- - If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
+{COMMON_RAG_RULES}

 Again, you should be sure that the answer is supported by the information provided!

@@ -399,7 +640,8 @@ And here is the question I want you to answer based on the context above:
 {{question}}
 {SEPARATOR_LINE}

-Please keep your answer brief and concise, and focus on facts and data.
+Please keep your answer brief and concise, and focus on facts and data. (Again, only state what you see in the documents \
+for sure and communicate if/in what way this may or may not relate to the question you need to answer!)

 Answer:
 """.strip()
@@ -439,6 +681,12 @@ independently without the original question available
 - For each sub-question, please also provide a search term that can be used to retrieve relevant documents from a document store.
 - Consider specifically the sub-questions that were suggested but not answered. This is a sign that they are not answerable \
 with the available context, and you should not ask similar questions.
+ - Do not(!) create sub-questions that are clarifying question to the person who asked the question, \
+like making suggestions or asking the user for more information! This is not useful for the actual \
+question-answering process! You need to take the information from the user as it is given to you! \
+For example, should the question be of the type 'why does product X perform poorly for customer A', DO NOT create a \
+sub-question of the type 'what are the settings that customer A uses for product X?'! A valid sub-question \
+could rather be 'which settings for product X have been shown to lead to poor performance for customers?'

 Here is the initial question:
 {SEPARATOR_LINE}
@@ -474,7 +722,111 @@ objects/relationships/terms you can ask about! Do not ask about entities, terms
 Again, please find questions that are NOT overlapping too much with the already answered sub-questions or those that \
 already were suggested and failed. In other words - what can we try in addition to what has been tried so far?

-Generate the list of questions separated by one new line like this:
+Generate the list of questions separated by one new line like this (and please ONLY ANSWER WITH THIS LIST! Do not \
+add any explanations or other text!):
+
+<sub-question 1>
+<sub-question 2>
+<sub-question 3>
+...""".strip()
+
+REFINEMENT_QUESTION_DECOMPOSITION_PROMPT_W_INITIAL_SUBQUESTION_ANSWERS = f"""
+An initial user question needs to be answered. An initial answer has been provided but it wasn't quite good enough. \
+Also, some sub-questions had been answered and this information has been used to provide the initial answer. \
+Some other subquestions may have been suggested based on little knowledge, but they were not directly answerable. \
+Also, some entities, relationships and terms are given to you so that you have an idea of how the available data looks like.
+
+Your role is to generate 2-4 new sub-questions that would help to answer the initial question, considering:
+
+1) The initial question
+2) The initial answer that was found to be unsatisfactory
+3) The sub-questions that were answered AND their answers
+4) The sub-questions that were suggested but not answered (and that you should not repeat!)
+5) The entities, relationships and terms that were extracted from the context
+
+The individual questions should be answerable by a good RAG system. So a good idea would be to use the sub-questions to \
+resolve ambiguities and/or to separate the question for different entities that may be involved in the original question, \
+but in a way that does not duplicate questions that were already tried.
+
+Additional Guidelines:
+
+- The new sub-questions should be specific to the question and provide richer context for the question, resolve ambiguities, \
+or address shortcoming of the initial answer
+
+- Each new sub-question - when answered - should be relevant for the answer to the original question
+
+- The new sub-questions should be free from comparisons, ambiguities,judgements, aggregations, or any other complications that \
+may require extra context
+
+- The new sub-questions MUST have the full context of the original question so that it can be executed by a RAG system \
+independently without the original question available
+    Example:
+    - initial question: "What is the capital of France?"
+    - bad sub-question: "What is the name of the river there?"
+    - good sub-question: "What is the name of the river that flows through Paris?"
+
+    - For each new sub-question, please also provide a search term that can be used to retrieve relevant documents \
+from a document store.
+
+- Consider specifically the sub-questions that were suggested but not answered. This is a sign that they are not answerable \
+with the available context, and you should not ask similar questions.
+
+- Pay attention to the answers of previous sub-question to make your sub-questions more specific! \
+Often the initial sub-questions were set up to give you critical information that you should use to generate new sub-questions.\
+For example, if the answer to a an earlier sub-question is \
+'Company B and C are competitors of Company A', you should not ask now a new sub-question involving the term 'competitors', \
+as you already have the information to create a more precise question - you should instead explicitly reference \
+'Company B' and 'Company C' in your new sub-questions, as these are the competitors based on the previously answered question.
+
+- Be precise(!) and don't make inferences you cannot be sure about! For example, in the previous example \
+where Company B and Company C were identified as competitors of Company A, and then you also get information on \
+companies D and E, do not make the inference that these are also competitors of Company A! Stick to the information you have!
+(Also, don't assume that companies B and C arethe only competitors of A, unless stated!)
+
+- Do not(!) create sub-questions that are clarifying question *to the person who asked the question*, \
+like making suggestions or asking the user for more information! This is not useful for the actual \
+question-answering process! You need to take the information from the user as it is given to you! \
+For example, should the question be of the type 'why does product X perform poorly for customer A', DO NOT create a \
+sub-question of the type 'what are the settings that customer A uses for product X?'! A valid sub-question \
+could rather be 'which settings for product X have been shown to lead to poor performance for customers?'
+
+Here is the initial question:
+{SEPARATOR_LINE}
+{{question}}
+{SEPARATOR_LINE}
+{{history}}
+
+Here is the initial sub-optimal answer:
+{SEPARATOR_LINE}
+{{base_answer}}
+{SEPARATOR_LINE}
+
+Here are the sub-questions that were answered:
+{SEPARATOR_LINE}
+{{answered_subquestions_with_answers}}
+{SEPARATOR_LINE}
+
+Here are the sub-questions that were suggested but not answered:
+{SEPARATOR_LINE}
+{{failed_sub_questions}}
+{SEPARATOR_LINE}
+
+And here are the entities, relationships and terms extracted from the context:
+{SEPARATOR_LINE}
+{{entity_term_extraction_str}}
+{SEPARATOR_LINE}
+
+Please generate the list of good, fully contextualized sub-questions that would help to address the main question. \
+Specifically pay attention also to the entities, relationships and terms extracted, as these indicate what type of \
+objects/relationships/terms you can ask about! Do not ask about entities, terms or relationships that are not mentioned \
+in the 'entities, relationships and terms' section.
+
+Again, please find questions that are NOT overlapping too much with the already answered sub-questions or those that \
+already were suggested and failed. In other words - what can we try in addition to what has been tried so far?
+
+Generate the list of questions separated by one new line like this (and please ONLY ANSWER WITH THIS LIST! Do not \
+add any explanations or other text!):
+
 <sub-question 1>
 <sub-question 2>
 <sub-question 3>
@@ -489,7 +841,7 @@ Your task is to improve on a given answer to a question, as the initial answer w
 Use the information provided below - and only the provided information - to write your new and improved answer.

 The information provided below consists of:
-  1) an initial answer that was given but found to be lacking in some way.
+  1) an initial answer that was given but likely found to be lacking in some way.
  2) a number of answered sub-questions - these are very important(!) and definitely should help you to answer the main \
 question. Note that the sub-questions have a type, 'initial' and 'refined'. The 'initial' ones were available for the \
 creation of the initial answer, but the 'refined' were not, they are new. So please use the 'refined' sub-questions in \
@@ -499,6 +851,7 @@ particular to update/extend/correct/enrich the initial answer and to add more de
 the relevant document for a fact!

 It is critical that you provide proper inline citations to documents in the format [D1], [D2], [D3], etc! \
+Please use format [D1][D2] and NOT [D1, D2] format if you cite two or more documents together! \
 It is important that the citation is close to the information it supports. \
 DO NOT just list all of the citations at the very end. \
 Feel free to also cite sub-questions in addition to documents, \
@@ -509,14 +862,7 @@ Proper citations are very important for the user!

 {{history}}

-IMPORTANT RULES:
- - If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer. \
-You may give some additional facts you learned, but do not try to invent an answer.
- - If the information is empty or irrelevant, just say "{UNKNOWN_ANSWER}".
- - If the information is relevant but not fully conclusive, provide an answer to the extent you can but also specify that \
-the information is not conclusive and why.
- - Ignore any existing citations within the answered sub-questions, like [D1]... and [Q2]! The citations you will need to \
-use will need to refer to the documents (and sub-questions) that you are explicitly presented with below!
+{COMMON_RAG_RULES}

 Again, you should be sure that the answer is supported by the information provided!

@@ -545,7 +891,9 @@ Lastly, here is the main question I want you to answer based on the information
 {{question}}
 {SEPARATOR_LINE}

-Please keep your answer brief and concise, and focus on facts and data.
+Please keep your answer brief and concise, and focus on facts and data. (Again, only state what you see in the documents for \
+sure and communicate if/in what way this may or may not relate to the question you need to answer! Use the answered \
+sub-questions as well, but be cautious and reconsider the docments again for validation.)

 Answer:
 """.strip()
@@ -561,18 +909,13 @@ The information provided below consists of:
  2) a number of documents that were also deemed relevant for the question.

 It is critical that you provide proper inline citations to documents in the format [D1], [D2], [D3], etc! \
+Please use format [D1][D2] and NOT [D1, D2] format if you cite two or more documents together! \
 It is important that the citation is close to the information it supports. \
 DO NOT just list all of the citations at the very end of your response. Citations are very important for the user!

 {{history}}

-IMPORTANT RULES:
- - If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer. \
-You may give some additional facts you learned, but do not try to invent an answer.
- - If the information is empty or irrelevant, just say "{UNKNOWN_ANSWER}".
- - If the information is relevant but not fully conclusive, provide an answer to the extent you can but also specify that \
-the information is not conclusive and why.
-
+{COMMON_RAG_RULES}
 Again, you should be sure that the answer is supported by the information provided!

 Try to keep your answer concise. But also highlight uncertainties you may have should there be substantial ones, \
@@ -597,11 +940,103 @@ Lastly, here is the question I want you to answer based on the information above
 {{question}}
 {SEPARATOR_LINE}

-Please keep your answer brief and concise, and focus on facts and data.
+Please keep your answer brief and concise, and focus on facts and data. (Again, only state what you see in the documents for \
+sure and communicate if/in what way this may or may not relate to the question you need to answer!)

 Answer:
 """.strip()

+REFINED_ANSWER_VALIDATION_PROMPT = f"""
+{{persona_specification}}
+
+Your task is to verify whether a given answer is truthful and accurate, and supported by the facts that you \
+will be provided with.
+
+The information provided below consists of:
+
+  1) a question that needed to be answered
+
+  2) a proposed answer to the question, whose accuracy you should assess
+
+  3) potentially, a brief summary of the history of the conversation thus far, as it may give more context \
+to the question. Note that the statements in the history are NOT considered as facts, ONLY but serve to to \
+give context to the question.
+
+  4) a number of answered sub-questions - you can take the answers as facts for these purposes.
+
+  5) a number of relevant documents that should support the answer and that you should use as fact, \
+i.e., if a statement in the document backs up a statement in the answer, then that statement in the answer \
+should be considered as true.
+
+
+IMPORTANT RULES AND CONSIDERATIONS:
+
+ - Please consider the statements made in the proposed answer and assess whether they are truthful and accurate, based \
+on the provided sub-answered and the documents. (Again, the history is NOT considered as facts!)
+
+ - Look in particular for:
+    * material statements that are not supported by the sub-answered or the documents
+    * assignments and groupings that are not supported, like company A is competitor of company B, but this is not \
+explicitly supported by documents or sub-answers, guesses or interpretations unless explicitly asked for
+
+ - look also at the citations in the proposed answer and assess whether they are appropriate given the statements \
+made in the proposed answer that cites the document.
+
+ - Are items grouped together amongst one headline where not all items belong to the category of the headline? \
+(Example: "Products used by Company A" where some products listed are not used by Company A)
+
+ - Does the proposed answer address the question in full?
+
+ - Is the answer specific to the question? Example: if the question asks for the prices for products by Company A, \
+but the answer lists the prices for products by Company A and Company B, or products it cannot be sure are by \
+Company A, then this is not quite specific enough to the question and the answer should be rejected.
+
+- Similarly, if the question asks for properties of a certain class but the proposed answer lists or includes entities \
+that are not of that class without very explicitly saying so, then the answer should be considered inaccurate.
+
+ - If there are any calculations in the proposed answer that are not supported by the documents, they need to be tested. \
+If any calculation is wrong, the proposed answer should be considered as not trustworthy.
+
+
+Here is the information:
+{SEPARATOR_LINE_LONG}
+
+QUESTION:
+{SEPARATOR_LINE}
+{{question}}
+{SEPARATOR_LINE}
+
+PROPOSED ANSWER:
+{SEPARATOR_LINE}
+{{proposed_answer}}
+{SEPARATOR_LINE}
+
+Here is the additional contextual information:
+{SEPARATOR_LINE_LONG}
+
+{{history}}
+
+Sub-questions and their answers (to be considered as facts):
+{SEPARATOR_LINE}
+{{answered_sub_questions}}
+{SEPARATOR_LINE}
+
+And here are the relevant documents that support the sub-question answers, and that are relevant for the actual question:
+{SEPARATOR_LINE}
+{{relevant_docs}}
+{SEPARATOR_LINE}
+
+
+Please think through this step by step. Format your response just as a string in the following format:
+
+Analysis: <think through your reasoning as outlined in the 'IMPORTANT RULES AND CONSIDERATIONS' section above, \
+but keep it short. Come to a conclusion whether the proposed answer can be trusted>
+Comments: <state your condensed comments you would give to a user reading the proposed answer, regarding the accuracy and \
+specificity.>
+{AGENT_ANSWER_SEPARATOR} <answer here only with yes or no, whether the proposed answer can be trusted. Base this on your \
+analysis, but only say 'yes' (trustworthy) or 'no' (not trustworthy)>
+""".strip()
+

 INITIAL_REFINED_ANSWER_COMPARISON_PROMPT = f"""
 For the given question, please compare the initial answer and the refined answer and determine if the refined answer is \
--- a/backend/onyx/redis/redis_connector_credential_pair.py
+++ b/backend/onyx/redis/redis_connector_credential_pair.py
@@ -120,6 +120,7 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
                queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,
                task_id=custom_task_id,
                priority=OnyxCeleryPriority.MEDIUM,
+                ignore_result=True,
            )

            num_tasks_sent += 1
--- a/backend/onyx/redis/redis_connector_delete.py
+++ b/backend/onyx/redis/redis_connector_delete.py
@@ -132,6 +132,7 @@ class RedisConnectorDelete:
                queue=OnyxCeleryQueues.CONNECTOR_DELETION,
                task_id=custom_task_id,
                priority=OnyxCeleryPriority.MEDIUM,
+                ignore_result=True,
            )

            async_results.append(result)
--- a/backend/onyx/redis/redis_connector_doc_perm_sync.py
+++ b/backend/onyx/redis/redis_connector_doc_perm_sync.py
@@ -11,6 +11,7 @@ from redis.lock import Lock as RedisLock

 from onyx.access.models import DocExternalAccess
 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
+from onyx.configs.constants import CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
@@ -49,7 +50,7 @@ class RedisConnectorPermissionSync:
    # it's impossible to get the exact state of the system at a single point in time
    # so we need a signal with a TTL to bridge gaps in our checks
    ACTIVE_PREFIX = PREFIX + "_active"
-    ACTIVE_TTL = 3600
+    ACTIVE_TTL = CELERY_PERMISSIONS_SYNC_LOCK_TIMEOUT * 2

    def __init__(self, tenant_id: str | None, id: int, redis: redis.Redis) -> None:
        self.tenant_id: str | None = tenant_id
@@ -195,6 +196,7 @@ class RedisConnectorPermissionSync:
                queue=OnyxCeleryQueues.DOC_PERMISSIONS_UPSERT,
                task_id=custom_task_id,
                priority=OnyxCeleryPriority.HIGH,
+                ignore_result=True,
            )
            async_results.append(result)

--- a/backend/onyx/redis/redis_connector_prune.py
+++ b/backend/onyx/redis/redis_connector_prune.py
@@ -10,6 +10,7 @@ from redis.lock import Lock as RedisLock
 from sqlalchemy.orm import Session

 from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
+from onyx.configs.constants import CELERY_PRUNING_LOCK_TIMEOUT
 from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
@@ -49,7 +50,7 @@ class RedisConnectorPrune:
    # it's impossible to get the exact state of the system at a single point in time
    # so we need a signal with a TTL to bridge gaps in our checks
    ACTIVE_PREFIX = PREFIX + "_active"
-    ACTIVE_TTL = 3600
+    ACTIVE_TTL = CELERY_PRUNING_LOCK_TIMEOUT * 2

    def __init__(self, tenant_id: str | None, id: int, redis: redis.Redis) -> None:
        self.tenant_id: str | None = tenant_id
@@ -201,6 +202,7 @@ class RedisConnectorPrune:
                queue=OnyxCeleryQueues.CONNECTOR_DELETION,
                task_id=custom_task_id,
                priority=OnyxCeleryPriority.MEDIUM,
+                ignore_result=True,
            )

            async_results.append(result)
--- a/backend/onyx/redis/redis_utils.py
+++ b/backend/onyx/redis/redis_utils.py
@@ -0,0 +1,29 @@
+from onyx.redis.redis_connector_credential_pair import (
+    RedisGlobalConnectorCredentialPair,
+)
+from onyx.redis.redis_connector_delete import RedisConnectorDelete
+from onyx.redis.redis_connector_doc_perm_sync import RedisConnectorPermissionSync
+from onyx.redis.redis_connector_index import RedisConnectorIndex
+from onyx.redis.redis_connector_prune import RedisConnectorPrune
+from onyx.redis.redis_document_set import RedisDocumentSet
+from onyx.redis.redis_usergroup import RedisUserGroup
+
+
+def is_fence(key_bytes: bytes) -> bool:
+    key_str = key_bytes.decode("utf-8")
+    if key_str == RedisGlobalConnectorCredentialPair.FENCE_KEY:
+        return True
+    if key_str.startswith(RedisDocumentSet.FENCE_PREFIX):
+        return True
+    if key_str.startswith(RedisUserGroup.FENCE_PREFIX):
+        return True
+    if key_str.startswith(RedisConnectorDelete.FENCE_PREFIX):
+        return True
+    if key_str.startswith(RedisConnectorPrune.FENCE_PREFIX):
+        return True
+    if key_str.startswith(RedisConnectorIndex.FENCE_PREFIX):
+        return True
+    if key_str.startswith(RedisConnectorPermissionSync.FENCE_PREFIX):
+        return True
+
+    return False
--- a/backend/onyx/seeding/load_yamls.py
+++ b/backend/onyx/seeding/load_yamls.py
@@ -162,6 +162,11 @@ def load_personas_from_yaml(
                else persona.get("is_visible")
            ),
            db_session=db_session,
+            is_default_persona=(
+                existing_persona.is_default_persona
+                if existing_persona is not None
+                else persona.get("is_default_persona", False)
+            ),
        )


--- a/backend/onyx/seeding/personas.yaml
+++ b/backend/onyx/seeding/personas.yaml
@@ -41,6 +41,7 @@ personas:
    icon_color: "#6FB1FF"
    display_priority: 0
    is_visible: true
+    is_default_persona: true
    starter_messages:
      - name: "Give me an overview of what's here"
        message: "Sample some documents and tell me what you find."
@@ -66,6 +67,7 @@ personas:
    icon_color: "#FF6F6F"
    display_priority: 1
    is_visible: true
+    is_default_persona: true
    starter_messages:
      - name: "Summarize a document"
        message: "If I have provided a document please summarize it for me. If not, please ask me to upload a document either by dragging it into the input bar or clicking the +file icon."
@@ -91,6 +93,7 @@ personas:
    icon_color: "#6FFF8D"
    display_priority: 2
    is_visible: false
+    is_default_persona: true
    starter_messages:
      - name: "Document Search"
        message: "Hi! Could you help me find information about our team structure and reporting lines from our internal documents?"
@@ -117,6 +120,7 @@ personas:
    image_generation: true
    display_priority: 3
    is_visible: true
+    is_default_persona: true
    starter_messages:
      - name: "Create visuals for a presentation"
        message: "Generate someone presenting a graph which clearly demonstrates an upwards trajectory."
--- a/backend/onyx/server/gpts/api.py
+++ b/backend/onyx/server/gpts/api.py
@@ -76,6 +76,7 @@ def gpt_search(
        user=None,
        llm=llm,
        fast_llm=fast_llm,
+        skip_query_analysis=True,
        db_session=db_session,
    ).reranked_sections

--- a/backend/onyx/server/settings/models.py
+++ b/backend/onyx/server/settings/models.py
@@ -12,10 +12,10 @@ class PageType(str, Enum):
    SEARCH = "search"


-class GatingType(str, Enum):
-    FULL = "full"  # Complete restriction of access to the product or service
-    PARTIAL = "partial"  # Full access but warning (no credit card on file)
-    NONE = "none"  # No restrictions, full access to all features
+class ApplicationStatus(str, Enum):
+    PAYMENT_REMINDER = "payment_reminder"
+    GATED_ACCESS = "gated_access"
+    ACTIVE = "active"


 class Notification(BaseModel):
@@ -43,7 +43,7 @@ class Settings(BaseModel):

    maximum_chat_retention_days: int | None = None
    gpu_enabled: bool | None = None
-    product_gating: GatingType = GatingType.NONE
+    application_status: ApplicationStatus = ApplicationStatus.ACTIVE
    anonymous_user_enabled: bool | None = None
    pro_search_disabled: bool | None = None
    auto_scroll: bool | None = None
--- a/backend/onyx/server/settings/store.py
+++ b/backend/onyx/server/settings/store.py
@@ -1,6 +1,7 @@
 from onyx.configs.constants import KV_SETTINGS_KEY
 from onyx.configs.constants import OnyxRedisLocks
 from onyx.key_value_store.factory import get_kv_store
+from onyx.key_value_store.interface import KvKeyNotFoundError
 from onyx.redis.redis_pool import get_redis_client
 from onyx.server.settings.models import Settings
 from onyx.utils.logger import setup_logger
@@ -17,6 +18,9 @@ def load_settings() -> Settings:
        settings = (
            Settings.model_validate(stored_settings) if stored_settings else Settings()
        )
+    except KvKeyNotFoundError:
+        logger.error(f"No settings found in KV store for key: {KV_SETTINGS_KEY}")
+        settings = Settings()
    except Exception as e:
        logger.error(f"Error loading settings from KV store: {str(e)}")
        settings = Settings()
--- a/backend/onyx/tools/base_tool.py
+++ b/backend/onyx/tools/base_tool.py
@@ -34,7 +34,7 @@ Now respond to the following:
 """.strip()


-class BaseTool(Tool):
+class BaseTool(Tool[None]):
    def build_next_prompt(
        self,
        prompt_builder: "AnswerPromptBuilder",
--- a/backend/onyx/tools/models.py
+++ b/backend/onyx/tools/models.py
@@ -1,11 +1,14 @@
+from collections.abc import Callable
 from typing import Any
 from uuid import UUID

 from pydantic import BaseModel
 from pydantic import model_validator
+from sqlalchemy.orm import Session

 from onyx.context.search.enums import SearchType
 from onyx.context.search.models import IndexFilters
+from onyx.context.search.models import InferenceSection


 class ToolResponse(BaseModel):
@@ -57,5 +60,15 @@ class SearchQueryInfo(BaseModel):
    recency_bias_multiplier: float


+class SearchToolOverrideKwargs(BaseModel):
+    force_no_rerank: bool
+    alternate_db_session: Session | None
+    retrieved_sections_callback: Callable[[list[InferenceSection]], None] | None
+    skip_query_analysis: bool
+
+    class Config:
+        arbitrary_types_allowed = True
+
+
 CHAT_SESSION_ID_PLACEHOLDER = "CHAT_SESSION_ID"
 MESSAGE_ID_PLACEHOLDER = "MESSAGE_ID"
--- a/backend/onyx/tools/tool.py
+++ b/backend/onyx/tools/tool.py
@@ -1,7 +1,9 @@
 import abc
 from collections.abc import Generator
 from typing import Any
+from typing import Generic
 from typing import TYPE_CHECKING
+from typing import TypeVar

 from onyx.llm.interfaces import LLM
 from onyx.llm.models import PreviousMessage
@@ -14,7 +16,10 @@ if TYPE_CHECKING:
    from onyx.tools.models import ToolResponse


-class Tool(abc.ABC):
+OVERRIDE_T = TypeVar("OVERRIDE_T")
+
+
+class Tool(abc.ABC, Generic[OVERRIDE_T]):
    @property
    @abc.abstractmethod
    def name(self) -> str:
@@ -57,7 +62,9 @@ class Tool(abc.ABC):
    """Actual execution of the tool"""

    @abc.abstractmethod
-    def run(self, **kwargs: Any) -> Generator["ToolResponse", None, None]:
+    def run(
+        self, override_kwargs: OVERRIDE_T | None = None, **llm_kwargs: Any
+    ) -> Generator["ToolResponse", None, None]:
        raise NotImplementedError

    @abc.abstractmethod
--- a/backend/onyx/tools/tool_implementations/custom/custom_tool.py
+++ b/backend/onyx/tools/tool_implementations/custom/custom_tool.py
@@ -74,6 +74,7 @@ class CustomToolCallSummary(BaseModel):
    tool_result: Any  # The response data


+# override_kwargs is not supported for custom tools
 class CustomTool(BaseTool):
    def __init__(
        self,
@@ -235,7 +236,9 @@ class CustomTool(BaseTool):

    """Actual execution of the tool"""

-    def run(self, **kwargs: Any) -> Generator[ToolResponse, None, None]:
+    def run(
+        self, override_kwargs: dict[str, Any] | None = None, **kwargs: Any
+    ) -> Generator[ToolResponse, None, None]:
        request_body = kwargs.get(REQUEST_BODY)

        path_params = {}
--- a/backend/onyx/tools/tool_implementations/images/image_generation_tool.py
+++ b/backend/onyx/tools/tool_implementations/images/image_generation_tool.py
@@ -79,7 +79,8 @@ class ImageShape(str, Enum):
    LANDSCAPE = "landscape"


-class ImageGenerationTool(Tool):
+# override_kwargs is not supported for image generation tools
+class ImageGenerationTool(Tool[None]):
    _NAME = "run_image_generation"
    _DESCRIPTION = "Generate an image from a prompt."
    _DISPLAY_NAME = "Image Generation"
@@ -255,7 +256,9 @@ class ImageGenerationTool(Tool):
                "An error occurred during image generation. Please try again later."
            )

-    def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]:
+    def run(
+        self, override_kwargs: None = None, **kwargs: str
+    ) -> Generator[ToolResponse, None, None]:
        prompt = cast(str, kwargs["prompt"])
        shape = ImageShape(kwargs.get("shape", ImageShape.SQUARE))
        format = self.output_format
--- a/backend/onyx/tools/tool_implementations/internet_search/internet_search_tool.py
+++ b/backend/onyx/tools/tool_implementations/internet_search/internet_search_tool.py
@@ -106,7 +106,8 @@ def internet_search_response_to_search_docs(
    ]


-class InternetSearchTool(Tool):
+# override_kwargs is not supported for internet search tools
+class InternetSearchTool(Tool[None]):
    _NAME = "run_internet_search"
    _DISPLAY_NAME = "Internet Search"
    _DESCRIPTION = "Perform an internet search for up-to-date information."
@@ -242,7 +243,9 @@ class InternetSearchTool(Tool):
            ],
        )

-    def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]:
+    def run(
+        self, override_kwargs: None = None, **kwargs: str
+    ) -> Generator[ToolResponse, None, None]:
        query = cast(str, kwargs["internet_search_query"])

        results = self._perform_search(query)
--- a/backend/onyx/tools/tool_implementations/search/search_tool.py
+++ b/backend/onyx/tools/tool_implementations/search/search_tool.py
@@ -39,6 +39,7 @@ from onyx.secondary_llm_flows.choose_search import check_if_need_search
 from onyx.secondary_llm_flows.query_expansion import history_based_query_rephrase
 from onyx.tools.message import ToolCallSummary
 from onyx.tools.models import SearchQueryInfo
+from onyx.tools.models import SearchToolOverrideKwargs
 from onyx.tools.models import ToolResponse
 from onyx.tools.tool import Tool
 from onyx.tools.tool_implementations.search.search_utils import llm_doc_to_dict
@@ -77,7 +78,7 @@ HINT: if you are unfamiliar with the user input OR think the user input is a typ
 """


-class SearchTool(Tool):
+class SearchTool(Tool[SearchToolOverrideKwargs]):
    _NAME = "run_search"
    _DISPLAY_NAME = "Search Tool"
    _DESCRIPTION = SEARCH_TOOL_DESCRIPTION
@@ -275,14 +276,19 @@ class SearchTool(Tool):

        yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS_ID, response=llm_docs)

-    def run(self, **kwargs: Any) -> Generator[ToolResponse, None, None]:
-        query = cast(str, kwargs["query"])
-        force_no_rerank = cast(bool, kwargs.get("force_no_rerank", False))
-        alternate_db_session = cast(Session, kwargs.get("alternate_db_session", None))
-        retrieved_sections_callback = cast(
-            Callable[[list[InferenceSection]], None],
-            kwargs.get("retrieved_sections_callback"),
-        )
+    def run(
+        self, override_kwargs: SearchToolOverrideKwargs | None = None, **llm_kwargs: Any
+    ) -> Generator[ToolResponse, None, None]:
+        query = cast(str, llm_kwargs["query"])
+        force_no_rerank = False
+        alternate_db_session = None
+        retrieved_sections_callback = None
+        skip_query_analysis = False
+        if override_kwargs:
+            force_no_rerank = override_kwargs.force_no_rerank
+            alternate_db_session = override_kwargs.alternate_db_session
+            retrieved_sections_callback = override_kwargs.retrieved_sections_callback
+            skip_query_analysis = override_kwargs.skip_query_analysis

        if self.selected_sections:
            yield from self._build_response_for_specified_sections(query)
@@ -324,6 +330,7 @@ class SearchTool(Tool):
            user=self.user,
            llm=self.llm,
            fast_llm=self.fast_llm,
+            skip_query_analysis=skip_query_analysis,
            bypass_acl=self.bypass_acl,
            db_session=alternate_db_session or self.db_session,
            prompt_config=self.prompt_config,
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -34,8 +34,8 @@ langchain-core==0.3.24
 langchain-openai==0.2.9
 langchain-text-splitters==0.3.2
 langchainhub==0.1.21
-langgraph==0.2.59
-langgraph-checkpoint==2.0.5
+langgraph==0.2.72
+langgraph-checkpoint==2.0.13
 langgraph-sdk==0.1.44
 litellm==1.60.2
 lxml==5.3.0
--- a/backend/scripts/debugging/onyx_vespa.py
+++ b/backend/scripts/debugging/onyx_vespa.py
@@ -256,16 +256,28 @@ def get_documents_for_tenant_connector(


 def search_for_document(
-    index_name: str, document_id: str, max_hits: int | None = 10
+    index_name: str,
+    document_id: str | None = None,
+    tenant_id: str | None = None,
+    max_hits: int | None = 10,
 ) -> List[Dict[str, Any]]:
-    yql_query = (
-        f'select * from sources {index_name} where document_id contains "{document_id}"'
-    )
+    yql_query = f"select * from sources {index_name}"
+
+    conditions = []
+    if document_id is not None:
+        conditions.append(f'document_id contains "{document_id}"')
+
+    if tenant_id is not None:
+        conditions.append(f'tenant_id contains "{tenant_id}"')
+
+    if conditions:
+        yql_query += " where " + " and ".join(conditions)
+
    params: dict[str, Any] = {"yql": yql_query}
    if max_hits is not None:
        params["hits"] = max_hits
    with get_vespa_http_client() as client:
-        response = client.get(f"{SEARCH_ENDPOINT}/search/", params=params)
+        response = client.get(f"{SEARCH_ENDPOINT}search/", params=params)
        response.raise_for_status()
        result = response.json()
        documents = result.get("root", {}).get("children", [])
@@ -582,8 +594,15 @@ class VespaDebugging:
    ) -> None:
        update_document(self.tenant_id, connector_id, doc_id, fields)

-    def search_for_document(self, document_id: str) -> List[Dict[str, Any]]:
-        return search_for_document(self.index_name, document_id)
+    def delete_documents_for_tenant(self, count: int | None = None) -> None:
+        if not self.tenant_id:
+            raise Exception("Tenant ID is not set")
+        delete_documents_for_tenant(self.index_name, self.tenant_id, count=count)
+
+    def search_for_document(
+        self, document_id: str | None = None, tenant_id: str | None = None
+    ) -> List[Dict[str, Any]]:
+        return search_for_document(self.index_name, document_id, tenant_id)

    def delete_document(self, connector_id: int, doc_id: str) -> None:
        # Delete a document.
@@ -600,6 +619,147 @@ class VespaDebugging:
        get_document_acls(self.tenant_id, cc_pair_id, n)


+def delete_where(
+    index_name: str,
+    selection: str,
+    cluster: str = "default",
+    bucket_space: str | None = None,
+    continuation: str | None = None,
+    time_chunk: str | None = None,
+    timeout: str | None = None,
+    tracelevel: int | None = None,
+) -> None:
+    """
+    Removes visited documents in `cluster` where the given selection
+    is true, using Vespa's 'delete where' endpoint.
+
+    :param index_name: Typically <namespace>/<document-type> from your schema
+    :param selection:  The selection string, e.g., "true" or "foo contains 'bar'"
+    :param cluster:    The name of the cluster where documents reside
+    :param bucket_space:  e.g. 'global' or 'default'
+    :param continuation:  For chunked visits
+    :param time_chunk:    If you want to chunk the visit by time
+    :param timeout:       e.g. '10s'
+    :param tracelevel:    Increase for verbose logs
+    """
+    # Using index_name of form <namespace>/<document-type>, e.g. "nomic_ai_nomic_embed_text_v1"
+    # This route ends with "/docid/" since the actual ID is not specified — we rely on "selection".
+    path = f"/document/v1/{index_name}/docid/"
+
+    params = {
+        "cluster": cluster,
+        "selection": selection,
+    }
+
+    # Optional parameters
+    if bucket_space is not None:
+        params["bucketSpace"] = bucket_space
+    if continuation is not None:
+        params["continuation"] = continuation
+    if time_chunk is not None:
+        params["timeChunk"] = time_chunk
+    if timeout is not None:
+        params["timeout"] = timeout
+    if tracelevel is not None:
+        params["tracelevel"] = tracelevel  # type: ignore
+
+    with get_vespa_http_client() as client:
+        url = f"{VESPA_APPLICATION_ENDPOINT}{path}"
+        logger.info(f"Performing 'delete where' on {url} with selection={selection}...")
+        response = client.delete(url, params=params)
+        # (Optionally, you can keep fetching `continuation` from the JSON response
+        #  if you have more documents to delete in chunks.)
+        response.raise_for_status()  # will raise HTTPError if not 2xx
+        logger.info(f"Delete where completed with status: {response.status_code}")
+        print(f"Delete where completed with status: {response.status_code}")
+
+
+def delete_documents_for_tenant(
+    index_name: str,
+    tenant_id: str,
+    route: str | None = None,
+    condition: str | None = None,
+    timeout: str | None = None,
+    tracelevel: int | None = None,
+    count: int | None = None,
+) -> None:
+    """
+    For the given tenant_id and index_name (often in the form <namespace>/<document-type>),
+    find documents via search_for_document, then delete them one at a time using Vespa's
+    /document/v1/<namespace>/<document-type>/docid/<document-id> endpoint.
+
+    :param index_name: Typically <namespace>/<document-type> from your schema
+    :param tenant_id:  The tenant to match in your Vespa search
+    :param route:      Optional route parameter for delete
+    :param condition:  Optional conditional remove
+    :param timeout:    e.g. '10s'
+    :param tracelevel: Increase for verbose logs
+    """
+    deleted_count = 0
+    while True:
+        # Search for documents with the given tenant_id
+        docs = search_for_document(
+            index_name=index_name,
+            document_id=None,
+            tenant_id=tenant_id,
+            max_hits=100,  # Fetch in batches of 100
+        )
+
+        if not docs:
+            logger.info("No more documents found to delete.")
+            break
+
+        with get_vespa_http_client() as client:
+            for doc in docs:
+                if count is not None and deleted_count >= count:
+                    logger.info(f"Reached maximum delete limit of {count} documents.")
+                    return
+
+                fields = doc.get("fields", {})
+                doc_id_value = fields.get("document_id") or fields.get("documentid")
+                tenant_id = fields.get("tenant_id")
+                if tenant_id != tenant_id:
+                    raise Exception("Tenant ID mismatch")
+
+                if not doc_id_value:
+                    logger.warning(
+                        "Skipping a document that has no document_id in 'fields'."
+                    )
+                    continue
+
+                url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_id_value}"
+
+                params = {}
+                if condition:
+                    params["condition"] = condition
+                if route:
+                    params["route"] = route
+                if timeout:
+                    params["timeout"] = timeout
+                if tracelevel is not None:
+                    params["tracelevel"] = str(tracelevel)
+
+                response = client.delete(url, params=params)
+                if response.status_code == 200:
+                    logger.info(f"Successfully deleted doc_id={doc_id_value}")
+                    deleted_count += 1
+                else:
+                    logger.error(
+                        f"Failed to delete doc_id={doc_id_value}, "
+                        f"status={response.status_code}, response={response.text}"
+                    )
+                    print(
+                        f"Could not delete doc_id={doc_id_value}. "
+                        f"Status={response.status_code}, response={response.text}"
+                    )
+                    raise Exception(
+                        f"Could not delete doc_id={doc_id_value}. "
+                        f"Status={response.status_code}, response={response.text}"
+                    )
+
+    logger.info(f"Deleted {deleted_count} documents in total.")
+
+
 def main() -> None:
    parser = argparse.ArgumentParser(description="Vespa debugging tool")
    parser.add_argument(
@@ -612,6 +772,7 @@ def main() -> None:
            "update",
            "delete",
            "get_acls",
+            "delete-all-documents",
        ],
        required=True,
        help="Action to perform",
@@ -626,11 +787,20 @@ def main() -> None:
    parser.add_argument(
        "--fields", help="Fields to update, in JSON format (for update)"
    )
+    parser.add_argument(
+        "--count",
+        type=int,
+        help="Maximum number of documents to delete (for delete-all-documents)",
+    )

    args = parser.parse_args()
    vespa_debug = VespaDebugging(args.tenant_id)

-    if args.action == "config":
+    if args.action == "delete-all-documents":
+        if not args.tenant_id:
+            parser.error("--tenant-id is required for delete-all-documents action")
+        vespa_debug.delete_documents_for_tenant(count=args.count)
+    elif args.action == "config":
        vespa_debug.print_config()
    elif args.action == "connect":
        vespa_debug.check_connectivity()
--- a/backend/tests/daily/connectors/gitbook/test_gitbook_connector.py
+++ b/backend/tests/daily/connectors/gitbook/test_gitbook_connector.py
@@ -0,0 +1,81 @@
+import os
+import time
+
+import pytest
+
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.gitbook.connector import GitbookConnector
+
+
+@pytest.fixture
+def gitbook_connector() -> GitbookConnector:
+    connector = GitbookConnector(
+        space_id=os.environ["GITBOOK_SPACE_ID"],
+    )
+    connector.load_credentials(
+        {
+            "gitbook_api_key": os.environ["GITBOOK_API_KEY"],
+        }
+    )
+    return connector
+
+
+def test_gitbook_connector_basic(gitbook_connector: GitbookConnector) -> None:
+    doc_batch_generator = gitbook_connector.load_from_state()
+
+    # Get first batch of documents
+    doc_batch = next(doc_batch_generator)
+    assert len(doc_batch) > 0
+
+    # Verify first document structure
+    doc = doc_batch[0]
+
+    # Basic document properties
+    assert doc.id.startswith("gitbook-")
+    assert doc.semantic_identifier == "Acme Corp Internal Handbook"
+    assert doc.source == DocumentSource.GITBOOK
+
+    # Metadata checks
+    assert "path" in doc.metadata
+    assert "type" in doc.metadata
+    assert "kind" in doc.metadata
+
+    # Section checks
+    assert len(doc.sections) == 1
+    section = doc.sections[0]
+
+    # Content specific checks
+    content = section.text
+
+    # Check for specific content elements
+    assert "* Fruit Shopping List:" in content
+    assert "> test quote it doesn't mean anything" in content
+
+    # Check headings
+    assert "# Heading 1" in content
+    assert "## Heading 2" in content
+    assert "### Heading 3" in content
+
+    # Check task list
+    assert "- [ ] Uncompleted Task" in content
+    assert "- [x] Completed Task" in content
+
+    # Check table content
+    assert "| ethereum | 10 | 3000 |" in content
+    assert "| bitcoin | 2 | 98000 |" in content
+
+    # Check paragraph content
+    assert "New York City comprises 5 boroughs" in content
+    assert "Empire State Building" in content
+
+    # Check code block (just verify presence of some unique code elements)
+    assert "function fizzBuzz(n)" in content
+    assert 'res.push("FizzBuzz")' in content
+
+    assert section.link  # Should have a URL
+
+    # Time-based polling test
+    current_time = time.time()
+    poll_docs = gitbook_connector.poll_source(0, current_time)
+    poll_batch = next(poll_docs)
+    assert len(poll_batch) > 0
--- a/backend/tests/daily/connectors/jira/test_jira_basic.py
+++ b/backend/tests/daily/connectors/jira/test_jira_basic.py
@@ -34,11 +34,11 @@ def test_jira_connector_basic(jira_connector: JiraConnector) -> None:
    doc = doc_batch[0]

    assert doc.id == "https://danswerai.atlassian.net/browse/AS-2"
-    assert doc.semantic_identifier == "test123small"
+    assert doc.semantic_identifier == "AS-2: test123small"
    assert doc.source == DocumentSource.JIRA
    assert doc.metadata == {"priority": "Medium", "status": "Backlog"}
    assert doc.secondary_owners is None
-    assert doc.title is None
+    assert doc.title == "AS-2 test123small"
    assert doc.from_ingestion_api is False
    assert doc.additional_info is None

--- a/backend/tests/daily/llm/test_bedrock.py
+++ b/backend/tests/daily/llm/test_bedrock.py
@@ -23,6 +23,9 @@ def bedrock_provider() -> WellKnownLLMProviderDescriptor:
    return provider


+@pytest.mark.xfail(
+    reason="Credentials not yet available due to compliance work needed",
+)
 def test_bedrock_llm_configuration(
    client: TestClient, bedrock_provider: WellKnownLLMProviderDescriptor
 ) -> None:
--- a/backend/tests/regression/answer_quality/agent_test.py
+++ b/backend/tests/regression/answer_quality/agent_test.py
@@ -1,18 +1,39 @@
 import csv
-import datetime
 import json
 import os
+from collections import defaultdict
+from datetime import datetime
+from datetime import timedelta
+from typing import Any

 import yaml

 from onyx.agents.agent_search.deep_search.main.graph_builder import (
    main_graph_builder,
 )
-from onyx.agents.agent_search.deep_search.main.states import MainInput
+from onyx.agents.agent_search.deep_search.main.graph_builder import (
+    main_graph_builder as main_graph_builder_a,
+)
+from onyx.agents.agent_search.deep_search.main.states import (
+    MainInput as MainInput_a,
+)
+from onyx.agents.agent_search.run_graph import run_basic_graph
+from onyx.agents.agent_search.run_graph import run_main_graph
 from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
+from onyx.chat.models import AgentAnswerPiece
+from onyx.chat.models import OnyxAnswerPiece
+from onyx.chat.models import RefinedAnswerImprovement
+from onyx.chat.models import StreamStopInfo
+from onyx.chat.models import StreamType
+from onyx.chat.models import SubQuestionPiece
 from onyx.context.search.models import SearchRequest
 from onyx.db.engine import get_session_context_manager
 from onyx.llm.factory import get_default_llms
+from onyx.tools.force import ForceUseTool
+from onyx.tools.tool_implementations.search.search_tool import SearchTool
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()


 cwd = os.getcwd()
@@ -35,95 +56,183 @@ input_file_object = open(
 )
 output_file = f"{OUTPUT_DIR}/agent_test_output.csv"

+csv_output_data: list[list[str]] = []

 test_data = json.load(input_file_object)
 example_data = test_data["examples"]
 example_ids = test_data["example_ids"]

+failed_example_ids: list[int] = []
+
 with get_session_context_manager() as db_session:
-    output_data = []
+    output_data: dict[str, Any] = {}
+
+    primary_llm, fast_llm = get_default_llms()

    for example in example_data:
-        example_id = example["id"]
+        query_start_time: datetime = datetime.now()
+        example_id: int = int(example.get("id"))
+        example_question: str = example.get("question")
+        if not example_question or not example_id:
+            continue
        if len(example_ids) > 0 and example_id not in example_ids:
            continue

-        example_question = example["question"]
-        target_sub_questions = example.get("target_sub_questions", [])
-        num_target_sub_questions = len(target_sub_questions)
-        search_request = SearchRequest(query=example_question)
+        logger.info(f"{query_start_time} -- Processing example {example_id}")

-        config, search_tool = get_test_config(
-            db_session=db_session,
-            primary_llm=primary_llm,
-            fast_llm=fast_llm,
-            search_request=search_request,
-        )
+        try:
+            example_question = example["question"]
+            target_sub_questions = example.get("target_sub_questions", [])
+            num_target_sub_questions = len(target_sub_questions)
+            search_request = SearchRequest(query=example_question)

-        inputs = MainInput()
+            initial_answer_duration: timedelta | None = None
+            refined_answer_duration: timedelta | None = None
+            base_answer_duration: timedelta | None = None

-        start_time = datetime.datetime.now()
+            logger.debug("\n\nTEST QUERY START\n\n")

-        question_result = compiled_graph.invoke(
-            input=inputs, config={"metadata": {"config": config}}
-        )
-        end_time = datetime.datetime.now()
+            graph = main_graph_builder_a()
+            compiled_graph = graph.compile()
+            query_end_time = datetime.now()

-        duration = end_time - start_time
-        if num_target_sub_questions > 0:
-            chunk_expansion_ratio = (
-                question_result["initial_agent_stats"]
-                .get("agent_effectiveness", {})
-                .get("utilized_chunk_ratio", None)
+            search_request = SearchRequest(
+                # query="what can you do with gitlab?",
+                # query="What are the guiding principles behind the development of cockroachDB",
+                # query="What are the temperatures in Munich, Hawaii, and New York?",
+                # query="When was Washington born?",
+                # query="What is Onyx?",
+                # query="What is the difference between astronomy and astrology?",
+                query=example_question,
            )
-            support_effectiveness_ratio = (
-                question_result["initial_agent_stats"]
-                .get("agent_effectiveness", {})
-                .get("support_ratio", None)
-            )
-        else:
-            chunk_expansion_ratio = None
-            support_effectiveness_ratio = None

-        generated_sub_questions = question_result.get("generated_sub_questions", [])
-        num_generated_sub_questions = len(generated_sub_questions)
-        base_answer = question_result["initial_base_answer"].split("==")[-1]
-        agent_answer = question_result["initial_answer"].split("==")[-1]
+            answer_tokens: dict[str, list[str]] = defaultdict(list)

-        output_point = {
-            "example_id": example_id,
-            "question": example_question,
-            "duration": duration,
-            "target_sub_questions": target_sub_questions,
-            "generated_sub_questions": generated_sub_questions,
-            "num_target_sub_questions": num_target_sub_questions,
-            "num_generated_sub_questions": num_generated_sub_questions,
-            "chunk_expansion_ratio": chunk_expansion_ratio,
-            "support_effectiveness_ratio": support_effectiveness_ratio,
-            "base_answer": base_answer,
-            "agent_answer": agent_answer,
-        }
+            with get_session_context_manager() as db_session:
+                config = get_test_config(
+                    db_session, primary_llm, fast_llm, search_request
+                )
+                assert (
+                    config.persistence is not None
+                ), "set a chat session id to run this test"

-        output_data.append(output_point)
+                # search_request.persona = get_persona_by_id(1, None, db_session)
+                # config.perform_initial_search_path_decision = False
+                config.behavior.perform_initial_search_decomposition = True
+                input = MainInput_a()
+
+                # Base Flow
+                base_flow_start_time: datetime = datetime.now()
+                for output in run_basic_graph(config):
+                    if isinstance(output, OnyxAnswerPiece):
+                        answer_tokens["base_answer"].append(output.answer_piece or "")
+
+                output_data["base_answer"] = "".join(answer_tokens["base_answer"])
+                output_data["base_answer_duration"] = (
+                    datetime.now() - base_flow_start_time
+                )
+
+                # Agent Flow
+                agent_flow_start_time: datetime = datetime.now()
+                config = get_test_config(
+                    db_session,
+                    primary_llm,
+                    fast_llm,
+                    search_request,
+                    use_agentic_search=True,
+                )
+
+                config.tooling.force_use_tool = ForceUseTool(
+                    force_use=True, tool_name=SearchTool._NAME
+                )
+
+                tool_responses: list = []
+
+                sub_question_dict_tokens: dict[int, dict[int, str]] = defaultdict(
+                    lambda: defaultdict(str)
+                )
+
+                for output in run_main_graph(config):
+                    if isinstance(output, AgentAnswerPiece):
+                        if output.level == 0 and output.level_question_num == 0:
+                            answer_tokens["initial"].append(output.answer_piece)
+                        elif output.level == 1 and output.level_question_num == 0:
+                            answer_tokens["refined"].append(output.answer_piece)
+                    elif isinstance(output, SubQuestionPiece):
+                        if (
+                            output.level is not None
+                            and output.level_question_num is not None
+                        ):
+                            sub_question_dict_tokens[output.level][
+                                output.level_question_num
+                            ] += output.sub_question
+                    elif isinstance(output, StreamStopInfo):
+                        if (
+                            output.stream_type == StreamType.MAIN_ANSWER
+                            and output.level == 0
+                        ):
+                            initial_answer_duration = (
+                                datetime.now() - agent_flow_start_time
+                            )
+                    elif isinstance(output, RefinedAnswerImprovement):
+                        output_data["refined_answer_improves_on_initial_answer"] = str(
+                            output.refined_answer_improvement
+                        )
+
+                refined_answer_duration = datetime.now() - agent_flow_start_time
+
+                output_data["example_id"] = example_id
+                output_data["question"] = example_question
+                output_data["initial_answer"] = "".join(answer_tokens["initial"])
+                output_data["refined_answer"] = "".join(answer_tokens["refined"])
+                output_data["initial_answer_duration"] = initial_answer_duration or ""
+                output_data["refined_answer_duration"] = refined_answer_duration
+
+                output_data["initial_sub_questions"] = "\n---\n".join(
+                    [x for x in sub_question_dict_tokens[0].values()]
+                )
+                output_data["refined_sub_questions"] = "\n---\n".join(
+                    [x for x in sub_question_dict_tokens[1].values()]
+                )
+
+                csv_output_data.append(
+                    [
+                        str(example_id),
+                        example_question,
+                        output_data["base_answer"],
+                        output_data["base_answer_duration"],
+                        output_data["initial_sub_questions"],
+                        output_data["initial_answer"],
+                        output_data["initial_answer_duration"],
+                        output_data["refined_sub_questions"],
+                        output_data["refined_answer"],
+                        output_data["refined_answer_duration"],
+                        output_data["refined_answer_improves_on_initial_answer"],
+                    ]
+                )
+        except Exception as e:
+            logger.error(f"Error processing example {example_id}: {e}")
+            failed_example_ids.append(example_id)
+            continue


 with open(output_file, "w", newline="") as csvfile:
-    fieldnames = [
-        "example_id",
-        "question",
-        "duration",
-        "target_sub_questions",
-        "generated_sub_questions",
-        "num_target_sub_questions",
-        "num_generated_sub_questions",
-        "chunk_expansion_ratio",
-        "support_effectiveness_ratio",
-        "base_answer",
-        "agent_answer",
-    ]
-
-    writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter="\t")
-    writer.writeheader()
-    writer.writerows(output_data)
+    writer = csv.writer(csvfile, delimiter="\t")
+    writer.writerow(
+        [
+            "example_id",
+            "question",
+            "base_answer",
+            "base_answer_duration",
+            "initial_sub_questions",
+            "initial_answer",
+            "initial_answer_duration",
+            "refined_sub_questions",
+            "refined_answer",
+            "refined_answer_duration",
+            "refined_answer_improves_on_initial_answer",
+        ]
+    )
+    writer.writerows(csv_output_data)

 print("DONE")
--- a/web/Dockerfile
+++ b/web/Dockerfile
@@ -84,6 +84,9 @@ ENV NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED}
 ARG NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK
 ENV NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=${NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK}

+ARG NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY
+ENV NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY}
+
 # Use NODE_OPTIONS in the build command
 RUN NODE_OPTIONS="${NODE_OPTIONS}" npx next build

@@ -145,7 +148,6 @@ ENV NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT}
 ARG NEXT_PUBLIC_CUSTOM_REFRESH_URL
 ENV NEXT_PUBLIC_CUSTOM_REFRESH_URL=${NEXT_PUBLIC_CUSTOM_REFRESH_URL}

-
 ARG NEXT_PUBLIC_POSTHOG_KEY
 ARG NEXT_PUBLIC_POSTHOG_HOST
 ENV NEXT_PUBLIC_POSTHOG_KEY=${NEXT_PUBLIC_POSTHOG_KEY}
@@ -166,6 +168,9 @@ ENV NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED}
 ARG NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK
 ENV NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=${NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK}

+ARG NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY
+ENV NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY}
+
 # Note: Don't expose ports here, Compose will handle that for us if necessary. 
 # If you want to run this without compose, specify the ports to 
 # expose via cli
--- a/web/public/GitBookDark.png
+++ b/web/public/GitBookDark.png
--- a/web/public/GitBookLight.png
+++ b/web/public/GitBookLight.png
--- a/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx
+++ b/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx
@@ -502,9 +502,10 @@ export default function AddConnector({
                        {oauthSupportedSources.includes(connector) &&
                          (NEXT_PUBLIC_CLOUD_ENABLED ||
                            NEXT_PUBLIC_TEST_ENV) && (
-                            <button
+                            <Button
+                              variant="navigate"
                              onClick={handleAuthorize}
-                              className="mt-6 text-sm bg-blue-500 px-2 py-1.5 flex text-text-200 flex-none rounded"
+                              className="mt-6 "
                              disabled={isAuthorizing}
                              hidden={!isAuthorizeVisible}
                            >
@@ -513,7 +514,7 @@ export default function AddConnector({
                                : `Authorize with ${getSourceDisplayName(
                                    connector
                                  )}`}
-                            </button>
+                            </Button>
                          )}
                      </div>
                    )}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
pablodanswer	48998a5204	k	2025-02-15 11:22:37 -08:00
pablonyx	697f8bc1c6	Reduce background errors (#4004 )	2025-02-14 17:35:26 -08:00
evan-danswer	3ba65214b8	bump version and fix related issues (#3996 )	2025-02-14 19:57:12 +00:00
joachim-danswer	6687d5d499	major Agent Search Updates (#3994 )	2025-02-14 19:40:21 +00:00
pablonyx	ec78f78f3c	k (#3999 )	2025-02-14 02:33:42 +00:00
rkuo-danswer	ed253e469a	add nano and vim to base image (#3995 ) Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-14 02:27:24 +00:00
pablodanswer	e3aafd95af	k	2025-02-13 18:34:05 -08:00
Weves	3a704f1950	Add new vars to github action	2025-02-13 18:33:17 -08:00
Weves	2bf8a7aee5	Misc improvements	2025-02-13 18:33:17 -08:00
Weves	c2f3302aa0	Fix mypy	2025-02-13 18:33:17 -08:00
neo773	7f4d1f27a0	Gitbook connector (#3991 ) * add parser * add tests	2025-02-13 17:58:05 -08:00
pablonyx	b70db15622	Bugfix Vespa Deletion Script (#3998 )	2025-02-13 17:26:04 -08:00
pablonyx	e9492ce9ec	minor read replica fix (#3997 )	2025-02-13 17:11:45 -08:00
pablodanswer	35574369ed	update cloud build to use public stripe key	2025-02-13 16:55:56 -08:00
pablonyx	eff433bdc5	Reduce errors in workers (#3962 )	2025-02-13 15:59:44 -08:00
pablonyx	3260d793d1	Billing fixes (#3976 )	2025-02-13 15:59:10 -08:00
Yuhong Sun	1a7aca06b9	Fix Agent Slowness (#3979 )	2025-02-13 15:54:34 -08:00
pablonyx	c6434db7eb	Add delete all for tenants in Vespa (#3970 )	2025-02-13 14:33:49 -08:00
joachim-danswer	667b9e04c5	updated rerank function arguments (#3988 )	2025-02-13 14:13:14 -08:00
rkuo-danswer	29c84d7707	xfail this test (#3992 ) Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-13 14:09:15 -08:00
pablonyx	17c915b11b	Improved email formatting (#3985 ) * prettier emails * k * remove mislieading comment * minor typing	2025-02-13 21:11:57 +00:00
rkuo-danswer	95ca592d6d	fix title check (#3993 ) Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-13 13:14:55 -08:00
Yuhong Sun	e39a27fd6b	Hope this actually skips the model server builds now (#3987 )	2025-02-13 11:48:25 -08:00
rkuo-danswer	26d3c952c6	Bugfix/jira connector test 2 (#3986 ) * fix jira connector test * typo fix --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-13 10:21:54 -08:00
rkuo-danswer	53683e2f3c	fix jira connector test (#3983 ) Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-13 09:41:45 -08:00
rkuo-danswer	0c0113a481	ignore result when using send_task on lightweight tasks (#3978 ) * ignore result when using send_task on lightweight tasks * fix ignore_result --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app> Co-authored-by: Richard Kuo <rkuo@rkuo.com>	2025-02-13 03:22:13 -08:00
Chris Weaver	c0f381e471	Add background errors ability (#3982 )	2025-02-13 00:44:55 -08:00
rkuo-danswer	5ed83f1148	no thread local locks in callbacks and raise permission sync timeout … (#3977 ) * no thread local locks in callbacks and raise permission sync timeout by a lot based on empirical log observations * more fixes --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-12 22:31:01 -08:00
pablonyx	9db7b67a6c	Minor misc ux improvements (#3966 ) * minor misc ux * nit * k * quick nit * k	2025-02-13 04:43:11 +00:00
Yuhong Sun	2850048c6b	Jira add key to semantic id (#3981 )	2025-02-12 20:04:47 -08:00
rkuo-danswer	61058e5fcd	merge monitoring with kickoff tasks (#3953 ) * move indexing * all monitor work moved * reacquire lock more * remove monitor task completely * fix import * fix pruning finalization * no multiplier on system/cloud tasks * monitor queues every 30 seconds in the cloud --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-13 02:35:41 +00:00