Nits

reused error strings & BaseMessage_Content
remove execs
2026-02-23 02:35:45 +00:00 · 2025-02-10 17:13:16 -08:00 · 2025-02-10 16:57:25 -08:00 · 2025-02-10 16:21:59 -08:00 · 2025-02-10 14:41:55 -08:00 · 2025-02-10 14:33:58 -08:00
134 changed files with 3251 additions and 1074 deletions
--- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
+++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml
@@ -67,6 +67,7 @@ jobs:
            NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }}
            NEXT_PUBLIC_GTM_ENABLED=true
            NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=true
+            NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=true
            NODE_OPTIONS=--max-old-space-size=8192
          # needed due to weird interactions with the builds for different platforms
          no-cache: true
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -94,16 +94,19 @@ jobs:
          cd deployment/docker_compose
          ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \
          MULTI_TENANT=true \
-          AUTH_TYPE=basic \
+          AUTH_TYPE=cloud \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
-          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
+          DEV_MODE=true \
+          docker compose -f docker-compose.multitenant-dev.yml -p danswer-stack up -d
        id: start_docker_multi_tenant

      # In practice, `cloud` Auth type would require OAUTH credentials to be set.
      - name: Run Multi-Tenant Integration Tests
        run: |
+          echo "Waiting for 3 minutes to ensure API server is ready..."
+          sleep 180
          echo "Running integration tests..."
          docker run --rm --network danswer-stack_default \
            --name test-runner \
@@ -119,6 +122,10 @@ jobs:
            -e TEST_WEB_HOSTNAME=test-runner \
            -e AUTH_TYPE=cloud \
            -e MULTI_TENANT=true \
+            -e REQUIRE_EMAIL_VERIFICATION=false \
+            -e DISABLE_TELEMETRY=true \
+            -e IMAGE_TAG=test \
+            -e DEV_MODE=true \
            onyxdotapp/onyx-integration:test \
            /app/tests/integration/multitenant_tests
        continue-on-error: true
@@ -126,17 +133,17 @@ jobs:

      - name: Check multi-tenant test results
        run: |
-          if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then
-            echo "Integration tests failed. Exiting with error."
+          if [ ${{ steps.run_multitenant_tests.outcome }} == 'failure' ]; then
+            echo "Multi-tenant integration tests failed. Exiting with error."
            exit 1
          else
-            echo "All integration tests passed successfully."
+            echo "All multi-tenant integration tests passed successfully."
          fi

      - name: Stop multi-tenant Docker containers
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
+          docker compose -f docker-compose.multitenant-dev.yml -p danswer-stack down -v

      - name: Start Docker containers
        run: |
@@ -216,27 +223,30 @@ jobs:
            echo "All integration tests passed successfully."
          fi

-      # save before stopping the containers so the logs can be captured
-      - name: Save Docker logs
-        if: success() || failure()
+      # ------------------------------------------------------------
+      # Always gather logs BEFORE "down":
+      - name: Dump API server logs
+        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack logs > docker-compose.log
-          mv docker-compose.log ${{ github.workspace }}/docker-compose.log
+          docker compose -f docker-compose.dev.yml -p danswer-stack logs --no-color api_server > $GITHUB_WORKSPACE/api_server.log || true

-      - name: Stop Docker containers
+      - name: Dump all-container logs (optional)
+        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
+          docker compose -f docker-compose.dev.yml -p danswer-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true

      - name: Upload logs
-        if: success() || failure()
+        if: always()
        uses: actions/upload-artifact@v4
        with:
-          name: docker-logs
+          name: docker-all-logs
          path: ${{ github.workspace }}/docker-compose.log
+      # ------------------------------------------------------------

      - name: Stop Docker containers
+        if: always()
        run: |
          cd deployment/docker_compose
          docker compose -f docker-compose.dev.yml -p danswer-stack down -v
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -101,7 +101,8 @@ COPY ./alembic_tenants /app/alembic_tenants
 COPY ./alembic.ini /app/alembic.ini
 COPY supervisord.conf /usr/etc/supervisord.conf

-# Escape hatch
+# Escape hatch scripts
+COPY ./scripts/debugging /app/scripts/debugging
 COPY ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py

 # Put logo in assets
--- a/backend/alembic/versions/4ee1287bd26a_add_multiple_slack_bot_support.py
+++ b/backend/alembic/versions/4ee1287bd26a_add_multiple_slack_bot_support.py
@@ -5,7 +5,6 @@ Revises: 47e5bef3a1d7
 Create Date: 2024-11-06 13:15:53.302644

 """
-import logging
 from typing import cast
 from alembic import op
 import sqlalchemy as sa
@@ -20,13 +19,8 @@ down_revision = "47e5bef3a1d7"
 branch_labels: None = None
 depends_on: None = None

-# Configure logging
-logger = logging.getLogger("alembic.runtime.migration")
-logger.setLevel(logging.INFO)
-

 def upgrade() -> None:
-    logger.info(f"{revision}: create_table: slack_bot")
    # Create new slack_bot table
    op.create_table(
        "slack_bot",
@@ -63,7 +57,6 @@ def upgrade() -> None:
    )

    # Handle existing Slack bot tokens first
-    logger.info(f"{revision}: Checking for existing Slack bot.")
    bot_token = None
    app_token = None
    first_row_id = None
@@ -71,15 +64,12 @@ def upgrade() -> None:
    try:
        tokens = cast(dict, get_kv_store().load("slack_bot_tokens_config_key"))
    except Exception:
-        logger.warning("No existing Slack bot tokens found.")
        tokens = {}

    bot_token = tokens.get("bot_token")
    app_token = tokens.get("app_token")

    if bot_token and app_token:
-        logger.info(f"{revision}: Found bot and app tokens.")
-
        session = Session(bind=op.get_bind())
        new_slack_bot = SlackBot(
            name="Slack Bot (Migrated)",
@@ -170,10 +160,9 @@ def upgrade() -> None:
    # Clean up old tokens if they existed
    try:
        if bot_token and app_token:
-            logger.info(f"{revision}: Removing old bot and app tokens.")
            get_kv_store().delete("slack_bot_tokens_config_key")
    except Exception:
-        logger.warning("tried to delete tokens in dynamic config but failed")
+        pass
    # Rename the table
    op.rename_table(
        "slack_bot_config__standard_answer_category",
@@ -190,8 +179,6 @@ def upgrade() -> None:
    # Drop the table with CASCADE to handle dependent objects
    op.execute("DROP TABLE slack_bot_config CASCADE")

-    logger.info(f"{revision}: Migration complete.")
-

 def downgrade() -> None:
    # Recreate the old slack_bot_config table
@@ -273,7 +260,7 @@ def downgrade() -> None:
            }
            get_kv_store().store("slack_bot_tokens_config_key", tokens)
    except Exception:
-        logger.warning("Failed to save tokens back to KV store")
+        pass

    # Drop the new tables in reverse order
    op.drop_table("slack_channel_config")
--- a/backend/alembic/versions/eaa3b5593925_add_default_slack_channel_config.py
+++ b/backend/alembic/versions/eaa3b5593925_add_default_slack_channel_config.py
@@ -52,7 +52,11 @@ def upgrade() -> None:
                        slack_bot_id, persona_id, channel_config, enable_auto_filters, is_default
                    ) VALUES (
                        :bot_id, NULL,
-                        '{"channel_name": null, "respond_member_group_list": [], "answer_filters": [], "follow_up_tags": []}',
+                        '{"channel_name": null, '
+                        '"respond_member_group_list": [], '
+                        '"answer_filters": [], '
+                        '"follow_up_tags": [], '
+                        '"respond_tag_only": true}',
                        FALSE, TRUE
                    )
                """
--- a/backend/alembic/versions/f5437cc136c5_delete_non_search_assistants.py
+++ b/backend/alembic/versions/f5437cc136c5_delete_non_search_assistants.py
@@ -0,0 +1,53 @@
+"""delete non-search assistants
+
+Revision ID: f5437cc136c5
+Revises: eaa3b5593925
+Create Date: 2025-02-04 16:17:15.677256
+
+"""
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "f5437cc136c5"
+down_revision = "eaa3b5593925"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    pass
+
+
+def downgrade() -> None:
+    # Fix: split the statements into multiple op.execute() calls
+    op.execute(
+        """
+        WITH personas_without_search AS (
+            SELECT p.id
+            FROM persona p
+            LEFT JOIN persona__tool pt ON p.id = pt.persona_id
+            LEFT JOIN tool t ON pt.tool_id = t.id
+            GROUP BY p.id
+            HAVING COUNT(CASE WHEN t.in_code_tool_id = 'run_search' THEN 1 END) = 0
+        )
+        UPDATE slack_channel_config
+        SET persona_id = NULL
+        WHERE is_default = TRUE AND persona_id IN (SELECT id FROM personas_without_search)
+        """
+    )
+
+    op.execute(
+        """
+        WITH personas_without_search AS (
+            SELECT p.id
+            FROM persona p
+            LEFT JOIN persona__tool pt ON p.id = pt.persona_id
+            LEFT JOIN tool t ON pt.tool_id = t.id
+            GROUP BY p.id
+            HAVING COUNT(CASE WHEN t.in_code_tool_id = 'run_search' THEN 1 END) = 0
+        )
+        DELETE FROM slack_channel_config
+        WHERE is_default = FALSE AND persona_id IN (SELECT id FROM personas_without_search)
+        """
+    )
--- a/backend/ee/onyx/db/persona.py
+++ b/backend/ee/onyx/db/persona.py
@@ -2,8 +2,11 @@ from uuid import UUID

 from sqlalchemy.orm import Session

+from onyx.configs.constants import NotificationType
 from onyx.db.models import Persona__User
 from onyx.db.models import Persona__UserGroup
+from onyx.db.notification import create_notification
+from onyx.server.features.persona.models import PersonaSharedNotificationData


 def make_persona_private(
@@ -23,6 +26,14 @@ def make_persona_private(
        for user_uuid in user_ids:
            db_session.add(Persona__User(persona_id=persona_id, user_id=user_uuid))

+            create_notification(
+                user_id=user_uuid,
+                notif_type=NotificationType.PERSONA_SHARED,
+                db_session=db_session,
+                additional_data=PersonaSharedNotificationData(
+                    persona_id=persona_id,
+                ).model_dump(),
+            )
    if group_ids:
        for group_id in group_ids:
            db_session.add(
--- a/backend/ee/onyx/db/user_group.py
+++ b/backend/ee/onyx/db/user_group.py
@@ -218,14 +218,14 @@ def fetch_user_groups_for_user(
    return db_session.scalars(stmt).all()


-def construct_document_select_by_usergroup(
+def construct_document_id_select_by_usergroup(
    user_group_id: int,
 ) -> Select:
    """This returns a statement that should be executed using
    .yield_per() to minimize overhead. The primary consumers of this function
    are background processing task generators."""
    stmt = (
-        select(Document)
+        select(Document.id)
        .join(
            DocumentByConnectorCredentialPair,
            Document.id == DocumentByConnectorCredentialPair.id,
--- a/backend/ee/onyx/server/middleware/tenant_tracking.py
+++ b/backend/ee/onyx/server/middleware/tenant_tracking.py
@@ -64,6 +64,7 @@ async def _get_tenant_id_from_request(

    try:
        # Look up token data in Redis
+
        token_data = await retrieve_auth_token_data_from_redis(request)

        if not token_data:
@@ -87,13 +88,14 @@ async def _get_tenant_id_from_request(
        if not is_valid_schema_name(tenant_id):
            raise HTTPException(status_code=400, detail="Invalid tenant ID format")

-        return tenant_id
-
    except Exception as e:
        logger.error(f"Unexpected error in _get_tenant_id_from_request: {str(e)}")
        raise HTTPException(status_code=500, detail="Internal server error")

    finally:
+        if tenant_id:
+            return tenant_id
+
        # As a final step, check for explicit tenant_id cookie
        tenant_id_cookie = request.cookies.get(TENANT_ID_COOKIE_NAME)
        if tenant_id_cookie and is_valid_schema_name(tenant_id_cookie):
--- a/backend/ee/onyx/server/tenants/provisioning.py
+++ b/backend/ee/onyx/server/tenants/provisioning.py
@@ -24,6 +24,7 @@ from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
 from ee.onyx.server.tenants.user_mapping import user_owns_a_tenant
 from onyx.auth.users import exceptions
 from onyx.configs.app_configs import CONTROL_PLANE_API_BASE_URL
+from onyx.configs.app_configs import DEV_MODE
 from onyx.configs.constants import MilestoneRecordType
 from onyx.db.engine import get_session_with_tenant
 from onyx.db.engine import get_sqlalchemy_engine
@@ -85,7 +86,8 @@ async def create_tenant(email: str, referral_source: str | None = None) -> str:
        # Provision tenant on data plane
        await provision_tenant(tenant_id, email)
        # Notify control plane
-        await notify_control_plane(tenant_id, email, referral_source)
+        if not DEV_MODE:
+            await notify_control_plane(tenant_id, email, referral_source)
    except Exception as e:
        logger.error(f"Tenant provisioning failed: {e}")
        await rollback_tenant_provisioning(tenant_id)
--- a/backend/onyx/agents/agent_search/core_state.py
+++ b/backend/onyx/agents/agent_search/core_state.py
@@ -9,7 +9,6 @@ class CoreState(BaseModel):
    This is the core state that is shared across all subgraphs.
    """

-    base_question: str = ""
    log_messages: Annotated[list[str], add] = []


@@ -18,4 +17,4 @@ class SubgraphCoreState(BaseModel):
    This is the core state that is shared across all subgraphs.
    """

-    log_messages: Annotated[list[str], add]
+    log_messages: Annotated[list[str], add] = []
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
@@ -1,8 +1,8 @@
 from datetime import datetime
 from typing import cast

+from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
-from langchain_core.messages import merge_message_runs
 from langchain_core.runnables.config import RunnableConfig

 from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer.states import (
@@ -12,12 +12,39 @@ from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer
    SubQuestionAnswerCheckUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import AgentLLMErrorType
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. The sub-answer will be treated as 'relevant'",
+    rate_limit="LLM Rate Limit Error. The sub-answer will be treated as 'relevant'",
+    general_error="General LLM Error. The sub-answer will be treated as 'relevant'",
+)


 def check_sub_answer(
@@ -53,14 +80,46 @@ def check_sub_answer(

    graph_config = cast(GraphConfig, config["metadata"]["config"])
    fast_llm = graph_config.tooling.fast_llm
-    response = list(
-        fast_llm.stream(
+    agent_error: AgentErrorLoggingFormat | None = None
+    response: BaseMessage | None = None
+    try:
+        response = fast_llm.invoke(
            prompt=msg,
+            timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK,
        )
-    )

-    quality_str: str = merge_message_runs(response, chunk_separator="")[0].content
-    answer_quality = "yes" in quality_str.lower()
+    except LLMTimeoutError:
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - check sub answer")
+
+    except LLMRateLimitError:
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - check sub answer")
+
+    if agent_error:
+        answer_quality = True
+        log_result = agent_error.error_result
+
+    else:
+        if response:
+            quality_str: str = cast(str, response.content)
+            answer_quality = binary_string_test(
+                text=quality_str, positive_value=AGENT_POSITIVE_VALUE_STR
+            )
+
+        else:
+            answer_quality = True
+            quality_str = "yes - because LLM error"
+
+        log_result = f"Answer quality: {quality_str}"

    return SubQuestionAnswerCheckUpdate(
        answer_quality=answer_quality,
@@ -69,7 +128,7 @@ def check_sub_answer(
                graph_component="initial  - generate individual sub answer",
                node_name="check sub answer",
                node_start_time=node_start_time,
-                result=f"Answer quality: {quality_str}",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
@@ -16,6 +16,20 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_sub_question_answer_prompt,
 )
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    LLM_ANSWER_ERROR_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
@@ -30,11 +44,20 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import NO_RECOVERED_DOCS
 from onyx.utils.logger import setup_logger

 logger = setup_logger()

+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. A sub-answer could not be constructed and the sub-question will be ignored.",
+    rate_limit="LLM Rate Limit Error. A sub-answer could not be constructed and the sub-question will be ignored.",
+    general_error="General LLM Error. A sub-answer could not be constructed and the sub-question will be ignored.",
+)
+

 def generate_sub_answer(
    state: AnswerQuestionState,
@@ -57,6 +80,8 @@ def generate_sub_answer(

    if len(context_docs) == 0:
        answer_str = NO_RECOVERED_DOCS
+        cited_documents: list = []
+        log_results = "No documents retrieved"
        write_custom_event(
            "sub_answers",
            AgentAnswerPiece(
@@ -79,41 +104,67 @@ def generate_sub_answer(

        response: list[str | list[str | dict[str, Any]]] = []
        dispatch_timings: list[float] = []
-        for message in fast_llm.stream(
-            prompt=msg,
-        ):
-            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-            content = message.content
-            if not isinstance(content, str):
-                raise ValueError(
-                    f"Expected content to be a string, but got {type(content)}"
+
+        agent_error: AgentErrorLoggingFormat | None = None
+
+        try:
+            for message in fast_llm.stream(
+                prompt=msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION,
+            ):
+                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+                content = message.content
+                if not isinstance(content, str):
+                    raise ValueError(
+                        f"Expected content to be a string, but got {type(content)}"
+                    )
+                start_stream_token = datetime.now()
+                write_custom_event(
+                    "sub_answers",
+                    AgentAnswerPiece(
+                        answer_piece=content,
+                        level=level,
+                        level_question_num=question_num,
+                        answer_type="agent_sub_answer",
+                    ),
+                    writer,
                )
-            start_stream_token = datetime.now()
-            write_custom_event(
-                "sub_answers",
-                AgentAnswerPiece(
-                    answer_piece=content,
-                    level=level,
-                    level_question_num=question_num,
-                    answer_type="agent_sub_answer",
-                ),
-                writer,
-            )
-            end_stream_token = datetime.now()
-            dispatch_timings.append(
-                (end_stream_token - start_stream_token).microseconds
-            )
-            response.append(content)
+                end_stream_token = datetime.now()
+                dispatch_timings.append(
+                    (end_stream_token - start_stream_token).microseconds
+                )
+                response.append(content)

-        answer_str = merge_message_runs(response, chunk_separator="")[0].content
-        logger.debug(
-            f"Average dispatch time: {sum(dispatch_timings) / len(dispatch_timings)}"
-        )
+        except LLMTimeoutError:
+            agent_error = AgentErrorLoggingFormat(
+                error_type=AgentLLMErrorType.TIMEOUT,
+                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+                error_result=_llm_node_error_strings.timeout,
+            )
+            logger.error("LLM Timeout Error - generate sub answer")
+        except LLMRateLimitError:
+            agent_error = AgentErrorLoggingFormat(
+                error_type=AgentLLMErrorType.RATE_LIMIT,
+                error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+                error_result=_llm_node_error_strings.rate_limit,
+            )
+            logger.error("LLM Rate Limit Error - generate sub answer")

-    answer_citation_ids = get_answer_citation_ids(answer_str)
-    cited_documents = [
-        context_docs[id] for id in answer_citation_ids if id < len(context_docs)
-    ]
+        if agent_error:
+            answer_str = LLM_ANSWER_ERROR_MESSAGE
+            cited_documents = []
+            log_results = (
+                agent_error.error_result
+                or "Sub-answer generation failed due to LLM error"
+            )
+
+        else:
+            answer_str = merge_message_runs(response, chunk_separator="")[0].content
+            answer_citation_ids = get_answer_citation_ids(answer_str)
+            cited_documents = [
+                context_docs[id] for id in answer_citation_ids if id < len(context_docs)
+            ]
+            log_results = None

    stop_event = StreamStopInfo(
        stop_reason=StreamStopReason.FINISHED,
@@ -131,7 +182,7 @@ def generate_sub_answer(
                graph_component="initial - generate individual sub answer",
                node_name="generate sub answer",
                node_start_time=node_start_time,
-                result="",
+                result=log_results or "",
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/states.py
@@ -42,10 +42,8 @@ class SubQuestionRetrievalIngestionUpdate(LoggerUpdate, BaseModel):


 class SubQuestionAnsweringInput(SubgraphCoreState):
-    question: str = ""
-    question_id: str = (
-        ""  # 0_0 is original question, everything else is <level>_<question_num>.
-    )
+    question: str
+    question_id: str
    # level 0 is original question and first decomposition, level 1 is follow up, etc
    # question_num is a unique number per original question per level.

--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
@@ -26,7 +26,18 @@ from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
 from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResultStats
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.operators import (
    dedup_inference_sections,
 )
@@ -42,12 +53,16 @@ from onyx.agents.agent_search.shared_graph_utils.utils import remove_document_ci
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
+from onyx.chat.models import StreamingError
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
-from onyx.context.search.models import InferenceSection
-from onyx.prompts.agent_search import (
-    INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS,
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
 )
+from onyx.context.search.models import InferenceSection
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
+from onyx.prompts.agent_search import INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS
 from onyx.prompts.agent_search import (
    INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
@@ -57,6 +72,12 @@ from onyx.prompts.agent_search import (
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses

+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. The initial answer could not be generated.",
+    rate_limit="LLM Rate Limit Error. The initial answer could not be generated.",
+    general_error="General LLM Error. The initial answer could not be generated.",
+)
+

 def generate_initial_answer(
    state: SubQuestionRetrievalState,
@@ -224,30 +245,82 @@ def generate_initial_answer(

        streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
        dispatch_timings: list[float] = []
-        for message in model.stream(msg):
-            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-            content = message.content
-            if not isinstance(content, str):
-                raise ValueError(
-                    f"Expected content to be a string, but got {type(content)}"
-                )
-            start_stream_token = datetime.now()

+        agent_error: AgentErrorLoggingFormat | None = None
+
+        try:
+            for message in model.stream(
+                msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION,
+            ):
+                # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+                content = message.content
+                if not isinstance(content, str):
+                    raise ValueError(
+                        f"Expected content to be a string, but got {type(content)}"
+                    )
+                start_stream_token = datetime.now()
+
+                write_custom_event(
+                    "initial_agent_answer",
+                    AgentAnswerPiece(
+                        answer_piece=content,
+                        level=0,
+                        level_question_num=0,
+                        answer_type="agent_level_answer",
+                    ),
+                    writer,
+                )
+                end_stream_token = datetime.now()
+                dispatch_timings.append(
+                    (end_stream_token - start_stream_token).microseconds
+                )
+                streamed_tokens.append(content)
+
+        except LLMTimeoutError:
+            agent_error = AgentErrorLoggingFormat(
+                error_type=AgentLLMErrorType.TIMEOUT,
+                error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+                error_result=_llm_node_error_strings.timeout,
+            )
+            logger.error("LLM Timeout Error - generate initial answer")
+
+        except LLMRateLimitError:
+            agent_error = AgentErrorLoggingFormat(
+                error_type=AgentLLMErrorType.RATE_LIMIT,
+                error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+                error_result=_llm_node_error_strings.rate_limit,
+            )
+            logger.error("LLM Rate Limit Error - generate initial answer")
+
+        if agent_error:
            write_custom_event(
                "initial_agent_answer",
-                AgentAnswerPiece(
-                    answer_piece=content,
-                    level=0,
-                    level_question_num=0,
-                    answer_type="agent_level_answer",
+                StreamingError(
+                    error=AGENT_LLM_TIMEOUT_MESSAGE,
                ),
                writer,
            )
-            end_stream_token = datetime.now()
-            dispatch_timings.append(
-                (end_stream_token - start_stream_token).microseconds
+            return InitialAnswerUpdate(
+                initial_answer=None,
+                error=AgentErrorLoggingFormat(
+                    error_message=agent_error.error_message or "An LLM error occurred",
+                    error_type=agent_error.error_type,
+                    error_result=agent_error.error_result,
+                ),
+                initial_agent_stats=None,
+                generated_sub_questions=sub_questions,
+                agent_base_end_time=None,
+                agent_base_metrics=None,
+                log_messages=[
+                    get_langgraph_node_log_string(
+                        graph_component="initial - generate initial answer",
+                        node_name="generate initial answer",
+                        node_start_time=node_start_time,
+                        result=agent_error.error_result or "An LLM error occurred",
+                    )
+                ],
            )
-            streamed_tokens.append(content)

        logger.debug(
            f"Average dispatch time for initial answer: {sum(dispatch_timings) / len(dispatch_timings)}"
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/validate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/validate_initial_answer.py
@@ -25,7 +25,7 @@ def validate_initial_answer(
        f"--------{node_start_time}--------Checking for base answer validity - for not set True/False manually"
    )

-    verdict = True
+    verdict = True  # not actually required as already streamed out. Refinement will do similar

    return InitialAnswerQualityUpdate(
        initial_answer_quality_eval=verdict,
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
@@ -12,8 +12,9 @@ from onyx.agents.agent_search.deep_search.initial.generate_initial_answer.states
 from onyx.agents.agent_search.deep_search.main.models import (
    AgentRefinedMetrics,
 )
+from onyx.agents.agent_search.deep_search.main.operations import dispatch_subquestion
 from onyx.agents.agent_search.deep_search.main.operations import (
-    dispatch_subquestion,
+    dispatch_subquestion_sep,
 )
 from onyx.agents.agent_search.deep_search.main.states import (
    InitialQuestionDecompositionUpdate,
@@ -22,6 +23,18 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_history_prompt,
 )
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
@@ -32,6 +45,11 @@ from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
 from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH,
 )
@@ -42,6 +60,12 @@ from onyx.utils.logger import setup_logger

 logger = setup_logger()

+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="LLM Timeout Error. Sub-questions could not be generated.",
+    rate_limit="LLM Rate Limit Error. Sub-questions could not be generated.",
+    general_error="General LLM Error. Sub-questions could not be generated.",
+)
+

 def decompose_orig_question(
    state: SubQuestionRetrievalState,
@@ -109,10 +133,37 @@ def decompose_orig_question(
        ),
        writer,
    )
+
    # dispatches custom events for subquestion tokens, adding in subquestion ids.
-    streamed_tokens = dispatch_separated(
-        model.stream(msg), dispatch_subquestion(0, writer)
-    )
+
+    agent_error: AgentErrorLoggingFormat | None = None
+    streamed_tokens: list[BaseMessage_Content] = []
+
+    try:
+        streamed_tokens = dispatch_separated(
+            model.stream(
+                msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION,
+            ),
+            dispatch_subquestion(0, writer),
+            sep_callback=dispatch_subquestion_sep(0, writer),
+        )
+    except LLMTimeoutError as e:
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - decompose orig question")
+        raise e  # fail loudly on this critical step
+    except LLMRateLimitError as e:
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - decompose orig question")
+        raise e

    stop_event = StreamStopInfo(
        stop_reason=StreamStopReason.FINISHED,
@@ -121,19 +172,19 @@ def decompose_orig_question(
    )
    write_custom_event("stream_finished", stop_event, writer)

-    deomposition_response = merge_content(*streamed_tokens)
+    if agent_error:
+        initial_sub_questions: list[str] = []
+        log_result = agent_error.error_result
+    else:
+        deomposition_response = merge_content(*streamed_tokens)

-    # this call should only return strings. Commenting out for efficiency
-    # assert [type(tok) == str for tok in streamed_tokens]
+        list_of_subqs = cast(str, deomposition_response).split("\n")

-    # use no-op cast() instead of str() which runs code
-    # list_of_subquestions = clean_and_parse_list_string(cast(str, response))
-    list_of_subqs = cast(str, deomposition_response).split("\n")
-
-    decomp_list: list[str] = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
+        initial_sub_questions = [sq.strip() for sq in list_of_subqs if sq.strip() != ""]
+        log_result = f"decomposed original question into {len(initial_sub_questions)} subquestions"

    return InitialQuestionDecompositionUpdate(
-        initial_sub_questions=decomp_list,
+        initial_sub_questions=initial_sub_questions,
        agent_start_time=agent_start_time,
        agent_refined_start_time=None,
        agent_refined_end_time=None,
@@ -147,7 +198,7 @@ def decompose_orig_question(
                graph_component="initial - generate sub answers",
                node_name="decompose original question",
                node_start_time=node_start_time,
-                result=f"decomposed original question into {len(decomp_list)} subquestions",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/graph_builder.py
@@ -252,9 +252,7 @@ if __name__ == "__main__":
            db_session, primary_llm, fast_llm, search_request
        )

-        inputs = MainInput(
-            base_question=graph_config.inputs.search_request.query, log_messages=[]
-        )
+        inputs = MainInput(log_messages=[])

        for thing in compiled_graph.stream(
            input=inputs,
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
@@ -1,6 +1,7 @@
 from datetime import datetime
 from typing import cast

+from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.runnables import RunnableConfig
 from langgraph.types import StreamWriter
@@ -10,14 +11,37 @@ from onyx.agents.agent_search.deep_search.main.states import (
 )
 from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import RefinedAnswerImprovement
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
 )
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out, and the answers could not be compared.",
+    rate_limit="The LLM encountered a rate limit, and the answers could not be compared.",
+    general_error="The LLM encountered an error, and the answers could not be compared.",
+)


 def compare_answers(
@@ -40,15 +64,46 @@ def compare_answers(

    msg = [HumanMessage(content=compare_answers_prompt)]

+    agent_error: AgentErrorLoggingFormat | None = None
    # Get the rewritten queries in a defined format
    model = graph_config.tooling.fast_llm
-
+    resp: BaseMessage | None = None
+    refined_answer_improvement: bool | None = None
    # no need to stream this
-    resp = model.invoke(msg)
+    try:
+        resp = model.invoke(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
+        )

-    refined_answer_improvement = (
-        isinstance(resp.content, str) and "yes" in resp.content.lower()
-    )
+    except LLMTimeoutError:
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - compare answers")
+        # continue as True in this support step
+    except LLMRateLimitError:
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - compare answers")
+        # continue as True in this support step
+
+    if agent_error or resp is None:
+        refined_answer_improvement = True
+        if agent_error:
+            log_result = agent_error.error_result
+        else:
+            log_result = "An answer could not be generated."
+
+    else:
+        refined_answer_improvement = (
+            isinstance(resp.content, str) and "yes" in resp.content.lower()
+        )
+        log_result = f"Answer comparison: {refined_answer_improvement}"

    write_custom_event(
        "refined_answer_improvement",
@@ -65,7 +120,7 @@ def compare_answers(
                graph_component="main",
                node_name="compare answers",
                node_start_time=node_start_time,
-                result=f"Answer comparison: {refined_answer_improvement}",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
@@ -9,8 +9,9 @@ from langgraph.types import StreamWriter
 from onyx.agents.agent_search.deep_search.main.models import (
    RefinementSubQuestion,
 )
+from onyx.agents.agent_search.deep_search.main.operations import dispatch_subquestion
 from onyx.agents.agent_search.deep_search.main.operations import (
-    dispatch_subquestion,
+    dispatch_subquestion_sep,
 )
 from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.deep_search.main.states import (
@@ -20,6 +21,18 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_history_prompt,
 )
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    format_entity_term_extraction,
@@ -29,10 +42,25 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import StreamingError
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    REFINEMENT_QUESTION_DECOMPOSITION_PROMPT,
 )
 from onyx.tools.models import ToolCallKickoff
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out. The sub-questions could not be generated.",
+    rate_limit="The LLM encountered a rate limit. The sub-questions could not be generated.",
+    general_error="The LLM encountered an error. The sub-questions could not be generated.",
+)


 def create_refined_sub_questions(
@@ -95,27 +123,65 @@ def create_refined_sub_questions(
    # Grader
    model = graph_config.tooling.fast_llm

-    streamed_tokens = dispatch_separated(
-        model.stream(msg), dispatch_subquestion(1, writer)
-    )
-    response = merge_content(*streamed_tokens)
+    agent_error: AgentErrorLoggingFormat | None = None
+    streamed_tokens: list[BaseMessage_Content] = []
+    try:
+        streamed_tokens = dispatch_separated(
+            model.stream(
+                msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION,
+            ),
+            dispatch_subquestion(1, writer),
+            sep_callback=dispatch_subquestion_sep(1, writer),
+        )
+    except LLMTimeoutError:
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - create refined sub questions")

-    if isinstance(response, str):
-        parsed_response = [q for q in response.split("\n") if q.strip() != ""]
-    else:
-        raise ValueError("LLM response is not a string")
+    except LLMRateLimitError:
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - create refined sub questions")

-    refined_sub_question_dict = {}
-    for sub_question_num, sub_question in enumerate(parsed_response):
-        refined_sub_question = RefinementSubQuestion(
-            sub_question=sub_question,
-            sub_question_id=make_question_id(1, sub_question_num + 1),
-            verified=False,
-            answered=False,
-            answer="",
+    if agent_error:
+        refined_sub_question_dict: dict[int, RefinementSubQuestion] = {}
+        log_result = agent_error.error_result
+        write_custom_event(
+            "refined_sub_question_creation_error",
+            StreamingError(
+                error="Your LLM was not able to create refined sub questions in time and timed out. Please try again.",
+            ),
+            writer,
        )

-        refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
+    else:
+        response = merge_content(*streamed_tokens)
+
+        if isinstance(response, str):
+            parsed_response = [q for q in response.split("\n") if q.strip() != ""]
+        else:
+            raise ValueError("LLM response is not a string")
+
+        refined_sub_question_dict = {}
+        for sub_question_num, sub_question in enumerate(parsed_response):
+            refined_sub_question = RefinementSubQuestion(
+                sub_question=sub_question,
+                sub_question_id=make_question_id(1, sub_question_num + 1),
+                verified=False,
+                answered=False,
+                answer="",
+            )
+
+            refined_sub_question_dict[sub_question_num + 1] = refined_sub_question
+
+        log_result = f"Created {len(refined_sub_question_dict)} refined sub questions"

    return RefinedQuestionDecompositionUpdate(
        refined_sub_questions=refined_sub_question_dict,
@@ -125,7 +191,7 @@ def create_refined_sub_questions(
                graph_component="main",
                node_name="create refined sub questions",
                node_start_time=node_start_time,
-                result=f"Created {len(refined_sub_question_dict)} refined sub questions",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/decide_refinement_need.py
@@ -26,6 +26,19 @@ def decide_refinement_need(

    decision = True  # TODO: just for current testing purposes

+    if state.error:
+        return RequireRefinemenEvalUpdate(
+            require_refined_answer_eval=False,
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="main",
+                    node_name="decide refinement need",
+                    node_start_time=node_start_time,
+                    result="Timeout Error",
+                )
+            ],
+        )
+
    log_messages = [
        get_langgraph_node_log_string(
            graph_component="main",
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
@@ -21,6 +21,9 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
+)
 from onyx.configs.constants import NUM_EXPLORATORY_DOCS
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT
 from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE
@@ -81,6 +84,7 @@ def extract_entities_terms(
    # Grader
    llm_response = fast_llm.invoke(
        prompt=msg,
+        timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION,
    )

    cleaned_response = (
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_refined_answer.py
@@ -11,7 +11,6 @@ from onyx.agents.agent_search.deep_search.main.models import (
    AgentRefinedMetrics,
 )
 from onyx.agents.agent_search.deep_search.main.operations import get_query_info
-from onyx.agents.agent_search.deep_search.main.operations import logger
 from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.deep_search.main.states import (
    RefinedAnswerUpdate,
@@ -23,7 +22,18 @@ from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
 from onyx.agents.agent_search.shared_graph_utils.models import InferenceSection
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats
 from onyx.agents.agent_search.shared_graph_utils.operators import (
    dedup_inference_sections,
@@ -43,8 +53,14 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
+from onyx.chat.models import StreamingError
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS,
 )
@@ -56,6 +72,15 @@ from onyx.prompts.agent_search import (
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out. The refined answer could not be generated.",
+    rate_limit="The LLM encountered a rate limit. The refined answer could not be generated.",
+    general_error="The LLM encountered an error. The refined answer could not be generated.",
+)


 def generate_refined_answer(
@@ -231,28 +256,80 @@ def generate_refined_answer(

    streamed_tokens: list[str | list[str | dict[str, Any]]] = [""]
    dispatch_timings: list[float] = []
-    for message in model.stream(msg):
-        # TODO: in principle, the answer here COULD contain images, but we don't support that yet
-        content = message.content
-        if not isinstance(content, str):
-            raise ValueError(
-                f"Expected content to be a string, but got {type(content)}"
-            )
+    agent_error: AgentErrorLoggingFormat | None = None

-        start_stream_token = datetime.now()
+    try:
+        for message in model.stream(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
+        ):
+            # TODO: in principle, the answer here COULD contain images, but we don't support that yet
+            content = message.content
+            if not isinstance(content, str):
+                raise ValueError(
+                    f"Expected content to be a string, but got {type(content)}"
+                )
+
+            start_stream_token = datetime.now()
+            write_custom_event(
+                "refined_agent_answer",
+                AgentAnswerPiece(
+                    answer_piece=content,
+                    level=1,
+                    level_question_num=0,
+                    answer_type="agent_level_answer",
+                ),
+                writer,
+            )
+            end_stream_token = datetime.now()
+            dispatch_timings.append(
+                (end_stream_token - start_stream_token).microseconds
+            )
+            streamed_tokens.append(content)
+
+    except LLMTimeoutError:
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - generate refined answer")
+
+    except LLMRateLimitError:
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - generate refined answer")
+
+    if agent_error:
        write_custom_event(
-            "refined_agent_answer",
-            AgentAnswerPiece(
-                answer_piece=content,
-                level=1,
-                level_question_num=0,
-                answer_type="agent_level_answer",
+            "initial_agent_answer",
+            StreamingError(
+                error=AGENT_LLM_TIMEOUT_MESSAGE,
            ),
            writer,
        )
-        end_stream_token = datetime.now()
-        dispatch_timings.append((end_stream_token - start_stream_token).microseconds)
-        streamed_tokens.append(content)
+
+        return RefinedAnswerUpdate(
+            refined_answer=None,
+            refined_answer_quality=False,  # TODO: replace this with the actual check value
+            refined_agent_stats=None,
+            agent_refined_end_time=None,
+            agent_refined_metrics=AgentRefinedMetrics(
+                refined_doc_boost_factor=0.0,
+                refined_question_boost_factor=0.0,
+                duration_s=None,
+            ),
+            log_messages=[
+                get_langgraph_node_log_string(
+                    graph_component="main",
+                    node_name="generate refined answer",
+                    node_start_time=node_start_time,
+                    result=agent_error.error_result or "An LLM error occurred",
+                )
+            ],
+        )

    logger.debug(
        f"Average dispatch time for refined answer: {sum(dispatch_timings) / len(dispatch_timings)}"
@@ -266,49 +343,6 @@ def generate_refined_answer(
        revision_question_efficiency=revision_question_efficiency,
    )

-    logger.debug(f"\n\n---INITIAL ANSWER ---\n\n Answer:\n Agent: {initial_answer}")
-    logger.debug("-" * 10)
-    logger.debug(f"\n\n---REVISED AGENT ANSWER ---\n\n Answer:\n Agent: {answer}")
-
-    logger.debug("-" * 100)
-
-    if state.initial_agent_stats:
-        initial_doc_boost_factor = state.initial_agent_stats.agent_effectiveness.get(
-            "utilized_chunk_ratio", "--"
-        )
-        initial_support_boost_factor = (
-            state.initial_agent_stats.agent_effectiveness.get("support_ratio", "--")
-        )
-        num_initial_verified_docs = state.initial_agent_stats.original_question.get(
-            "num_verified_documents", "--"
-        )
-        initial_verified_docs_avg_score = (
-            state.initial_agent_stats.original_question.get("verified_avg_score", "--")
-        )
-        initial_sub_questions_verified_docs = (
-            state.initial_agent_stats.sub_questions.get("num_verified_documents", "--")
-        )
-
-        logger.debug("INITIAL AGENT STATS")
-        logger.debug(f"Document Boost Factor: {initial_doc_boost_factor}")
-        logger.debug(f"Support Boost Factor: {initial_support_boost_factor}")
-        logger.debug(f"Originally Verified Docs: {num_initial_verified_docs}")
-        logger.debug(
-            f"Originally Verified Docs Avg Score: {initial_verified_docs_avg_score}"
-        )
-        logger.debug(
-            f"Sub-Questions Verified Docs: {initial_sub_questions_verified_docs}"
-        )
-    if refined_agent_stats:
-        logger.debug("-" * 10)
-        logger.debug("REFINED AGENT STATS")
-        logger.debug(
-            f"Revision Doc Factor: {refined_agent_stats.revision_doc_efficiency}"
-        )
-        logger.debug(
-            f"Revision Question Factor: {refined_agent_stats.revision_question_efficiency}"
-        )
-
    agent_refined_end_time = datetime.now()
    if state.agent_refined_start_time:
        agent_refined_duration = (
--- a/backend/onyx/agents/agent_search/deep_search/main/operations.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/operations.py
@@ -9,6 +9,9 @@ from onyx.agents.agent_search.shared_graph_utils.models import (
    SubQuestionAnswerResults,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
+from onyx.chat.models import StreamStopInfo
+from onyx.chat.models import StreamStopReason
+from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
 from onyx.context.search.models import IndexFilters
 from onyx.tools.models import SearchQueryInfo
@@ -34,6 +37,22 @@ def dispatch_subquestion(
    return _helper


+def dispatch_subquestion_sep(level: int, writer: StreamWriter) -> Callable[[int], None]:
+    def _helper(sep_num: int) -> None:
+        write_custom_event(
+            "stream_finished",
+            StreamStopInfo(
+                stop_reason=StreamStopReason.FINISHED,
+                stream_type=StreamType.SUB_QUESTIONS,
+                level=level,
+                level_question_num=sep_num,
+            ),
+            writer,
+        )
+
+    return _helper
+
+
 def calculate_initial_agent_stats(
    decomp_answer_results: list[SubQuestionAnswerResults],
    original_question_stats: AgentChunkRetrievalStats,
--- a/backend/onyx/agents/agent_search/deep_search/main/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/states.py
@@ -17,6 +17,7 @@ from onyx.agents.agent_search.orchestration.states import ToolCallUpdate
 from onyx.agents.agent_search.orchestration.states import ToolChoiceInput
 from onyx.agents.agent_search.orchestration.states import ToolChoiceUpdate
 from onyx.agents.agent_search.shared_graph_utils.models import AgentChunkRetrievalStats
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
 from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
@@ -76,6 +77,7 @@ class InitialAnswerUpdate(LoggerUpdate):
    """

    initial_answer: str | None = None
+    error: AgentErrorLoggingFormat | None = None
    initial_agent_stats: InitialAgentResultStats | None = None
    generated_sub_questions: list[str] = []
    agent_base_end_time: datetime | None = None
@@ -88,6 +90,7 @@ class RefinedAnswerUpdate(RefinedAgentEndStats, LoggerUpdate):
    """

    refined_answer: str | None = None
+    error: AgentErrorLoggingFormat | None = None
    refined_agent_stats: RefinedAgentStats | None = None
    refined_answer_quality: bool = False

--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
@@ -16,14 +16,40 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
    QueryExpansionUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
+)
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    QUERY_REWRITING_PROMPT,
 )
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="Query rewriting failed due to LLM timeout - the original question will be used.",
+    rate_limit="Query rewriting failed due to LLM rate limit - the original question will be used.",
+    general_error="Query rewriting failed due to LLM error - the original question will be used.",
+)


 def expand_queries(
@@ -54,13 +80,43 @@ def expand_queries(
        )
    ]

-    llm_response_list = dispatch_separated(
-        llm.stream(prompt=msg), dispatch_subquery(level, question_num, writer)
-    )
+    agent_error: AgentErrorLoggingFormat | None = None
+    llm_response_list: list[BaseMessage_Content] = []

-    llm_response = merge_message_runs(llm_response_list, chunk_separator="")[0].content
+    try:
+        llm_response_list = dispatch_separated(
+            llm.stream(
+                prompt=msg,
+                timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION,
+            ),
+            dispatch_subquery(level, question_num, writer),
+        )
+    except LLMTimeoutError:
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - expand queries")

-    rewritten_queries = llm_response.split("\n")
+    except LLMRateLimitError:
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - expand queries")
+    # use subquestion as query if query generation fails
+    if agent_error:
+        llm_response = ""
+        rewritten_queries = [question]
+        log_result = agent_error.error_result
+    else:
+        llm_response = merge_message_runs(llm_response_list, chunk_separator="")[
+            0
+        ].content
+        rewritten_queries = llm_response.split("\n")
+        log_result = f"Number of expanded queries: {len(rewritten_queries)}"

    return QueryExpansionUpdate(
        expanded_queries=rewritten_queries,
@@ -69,7 +125,7 @@ def expand_queries(
                graph_component="shared - expanded retrieval",
                node_name="expand queries",
                node_start_time=node_start_time,
-                result=f"Number of expanded queries: {len(rewritten_queries)}",
+                result=log_result,
            )
        ],
    )
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
@@ -1,5 +1,6 @@
 from typing import cast

+from langchain_core.messages import BaseMessage
 from langchain_core.messages import HumanMessage
 from langchain_core.runnables.config import RunnableConfig

@@ -10,12 +11,41 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
    DocVerificationUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
+from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
+    binary_string_test,
+)
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_RATELIMIT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_LLM_TIMEOUT_MESSAGE,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AGENT_POSITIVE_VALUE_STR,
+)
+from onyx.agents.agent_search.shared_graph_utils.constants import (
+    AgentLLMErrorType,
+)
+from onyx.agents.agent_search.shared_graph_utils.models import AgentErrorLoggingFormat
+from onyx.agents.agent_search.shared_graph_utils.models import LLMNodeErrorStrings
+from onyx.configs.agent_configs import AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.prompts.agent_search import (
    DOCUMENT_VERIFICATION_PROMPT,
 )
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+_llm_node_error_strings = LLMNodeErrorStrings(
+    timeout="The LLM timed out. The document could not be verified. The document will be treated as 'relevant'",
+    rate_limit="The LLM encountered a rate limit. The document could not be verified. The document will be treated as 'relevant'",
+    general_error="The LLM encountered an error. The document could not be verified. The document will be treated as 'relevant'",
+)


 def verify_documents(
@@ -26,7 +56,7 @@ def verify_documents(

    Args:
        state (DocVerificationInput): The current state
-        config (RunnableConfig): Configuration containing ProSearchConfig
+        config (RunnableConfig): Configuration containing AgentSearchConfig

    Updates:
        verified_documents: list[InferenceSection]
@@ -51,11 +81,42 @@ def verify_documents(
        )
    ]

-    response = fast_llm.invoke(msg)
+    agent_error: AgentErrorLoggingFormat | None = None
+    response: BaseMessage | None = None

-    verified_documents = []
-    if isinstance(response.content, str) and "yes" in response.content.lower():
-        verified_documents.append(retrieved_document_to_verify)
+    try:
+        response = fast_llm.invoke(
+            msg, timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
+        )
+
+    except LLMTimeoutError:
+        # In this case, we decide to continue and don't raise an error, as
+        # little harm in letting some docs through that are less relevant.
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.TIMEOUT,
+            error_message=AGENT_LLM_TIMEOUT_MESSAGE,
+            error_result=_llm_node_error_strings.timeout,
+        )
+        logger.error("LLM Timeout Error - verify documents")
+    except LLMRateLimitError:
+        # In this case, we decide to continue and don't raise an error, as
+        # little harm in letting some docs through that are less relevant.
+        agent_error = AgentErrorLoggingFormat(
+            error_type=AgentLLMErrorType.RATE_LIMIT,
+            error_message=AGENT_LLM_RATELIMIT_MESSAGE,
+            error_result=_llm_node_error_strings.rate_limit,
+        )
+        logger.error("LLM Rate Limit Error - verify documents")
+
+    if agent_error or response is None:
+        verified_documents = [retrieved_document_to_verify]
+
+    else:
+        verified_documents = []
+        if isinstance(response.content, str) and binary_string_test(
+            text=response.content, positive_value=AGENT_POSITIVE_VALUE_STR
+        ):
+            verified_documents.append(retrieved_document_to_verify)

    return DocVerificationUpdate(
        verified_documents=verified_documents,
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/states.py
@@ -21,9 +21,13 @@ from onyx.context.search.models import InferenceSection


 class ExpandedRetrievalInput(SubgraphCoreState):
-    question: str = ""
-    base_search: bool = False
+    # exception from 'no default value'for LangGraph input states
+    # Here, sub_question_id default Nonoe implies usage for the
+    # original question. This is sometimes needed for nested sub-graphs
+
    sub_question_id: str | None = None
+    question: str
+    base_search: bool


 ## Update/Return States
@@ -88,4 +92,4 @@ class DocVerificationInput(ExpandedRetrievalInput):


 class RetrievalInput(ExpandedRetrievalInput):
-    query_to_retrieve: str = ""
+    query_to_retrieve: str
--- a/backend/onyx/agents/agent_search/run_graph.py
+++ b/backend/onyx/agents/agent_search/run_graph.py
@@ -12,7 +12,7 @@ from onyx.agents.agent_search.deep_search.main.graph_builder import (
    main_graph_builder as main_graph_builder_a,
 )
 from onyx.agents.agent_search.deep_search.main.states import (
-    MainInput as MainInput_a,
+    MainInput as MainInput,
 )
 from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.utils import get_test_config
@@ -21,6 +21,7 @@ from onyx.chat.models import AnswerPacket
 from onyx.chat.models import AnswerStream
 from onyx.chat.models import ExtendedToolResponse
 from onyx.chat.models import RefinedAnswerImprovement
+from onyx.chat.models import StreamingError
 from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import SubQueryPiece
 from onyx.chat.models import SubQuestionPiece
@@ -33,6 +34,7 @@ from onyx.llm.factory import get_default_llms
 from onyx.tools.tool_runner import ToolCallKickoff
 from onyx.utils.logger import setup_logger

+
 logger = setup_logger()

 _COMPILED_GRAPH: CompiledStateGraph | None = None
@@ -72,13 +74,15 @@ def _parse_agent_event(
            return cast(AnswerPacket, event["data"])
        elif event["name"] == "refined_answer_improvement":
            return cast(RefinedAnswerImprovement, event["data"])
+        elif event["name"] == "refined_sub_question_creation_error":
+            return cast(StreamingError, event["data"])
    return None


 def manage_sync_streaming(
    compiled_graph: CompiledStateGraph,
    config: GraphConfig,
-    graph_input: BasicInput | MainInput_a,
+    graph_input: BasicInput | MainInput,
 ) -> Iterable[StreamEvent]:
    message_id = config.persistence.message_id if config.persistence else None
    for event in compiled_graph.stream(
@@ -92,7 +96,7 @@ def manage_sync_streaming(
 def run_graph(
    compiled_graph: CompiledStateGraph,
    config: GraphConfig,
-    input: BasicInput | MainInput_a,
+    input: BasicInput | MainInput,
 ) -> AnswerStream:
    config.behavior.perform_initial_search_decomposition = (
        INITIAL_SEARCH_DECOMPOSITION_ENABLED
@@ -123,9 +127,7 @@ def run_main_graph(
 ) -> AnswerStream:
    compiled_graph = load_compiled_graph()

-    input = MainInput_a(
-        base_question=config.inputs.search_request.query, log_messages=[]
-    )
+    input = MainInput(log_messages=[])

    # Agent search is not a Tool per se, but this is helpful for the frontend
    yield ToolCallKickoff(
@@ -172,9 +174,7 @@ if __name__ == "__main__":
            # search_request.persona = get_persona_by_id(1, None, db_session)
            # config.perform_initial_search_path_decision = False
            config.behavior.perform_initial_search_decomposition = True
-            input = MainInput_a(
-                base_question=config.inputs.search_request.query, log_messages=[]
-            )
+            input = MainInput(log_messages=[])

            tool_responses: list = []
            for output in run_graph(compiled_graph, config, input):
--- a/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
@@ -150,3 +150,17 @@ def get_prompt_enrichment_components(
        history=history,
        date_str=date_str,
    )
+
+
+def binary_string_test(text: str, positive_value: str = "yes") -> bool:
+    """
+    Tests if a string contains a positive value (case-insensitive).
+
+    Args:
+        text: The string to test
+        positive_value: The value to look for (defaults to "yes")
+
+    Returns:
+        True if the positive value is found in the text
+    """
+    return positive_value.lower() in text.lower()
--- a/backend/onyx/agents/agent_search/shared_graph_utils/constants.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/constants.py
@@ -0,0 +1,17 @@
+from enum import Enum
+
+AGENT_LLM_TIMEOUT_MESSAGE = "The agent timed out. Please try again."
+AGENT_LLM_ERROR_MESSAGE = "The agent encountered an error. Please try again."
+AGENT_LLM_RATELIMIT_MESSAGE = (
+    "The agent encountered a rate limit error. Please try again."
+)
+LLM_ANSWER_ERROR_MESSAGE = "The question was not answered due to an LLM error."
+
+AGENT_POSITIVE_VALUE_STR = "yes"
+AGENT_NEGATIVE_VALUE_STR = "no"
+
+
+class AgentLLMErrorType(str, Enum):
+    TIMEOUT = "timeout"
+    RATE_LIMIT = "rate_limit"
+    GENERAL_ERROR = "general_error"
--- a/backend/onyx/agents/agent_search/shared_graph_utils/models.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/models.py
@@ -1,3 +1,5 @@
+from typing import Any
+
 from pydantic import BaseModel

 from onyx.agents.agent_search.deep_search.main.models import (
@@ -56,6 +58,12 @@ class InitialAgentResultStats(BaseModel):
    agent_effectiveness: dict[str, float | int | None]


+class AgentErrorLoggingFormat(BaseModel):
+    error_message: str
+    error_type: str
+    error_result: str | None = None
+
+
 class RefinedAgentStats(BaseModel):
    revision_doc_efficiency: float | None
    revision_question_efficiency: float | None
@@ -126,3 +134,12 @@ class AgentPromptEnrichmentComponents(BaseModel):
    persona_prompts: PersonaPromptExpressions
    history: str
    date_str: str
+
+
+class LLMNodeErrorStrings(BaseModel):
+    timeout: str = "LLM Timeout Error"
+    rate_limit: str = "LLM Rate Limit Error"
+    general_error: str = "General LLM Error"
+
+
+BaseMessage_Content = str | list[str | dict[str, Any]]
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -20,6 +20,7 @@ from onyx.agents.agent_search.models import GraphInputs
 from onyx.agents.agent_search.models import GraphPersistence
 from onyx.agents.agent_search.models import GraphSearchConfig
 from onyx.agents.agent_search.models import GraphTooling
+from onyx.agents.agent_search.shared_graph_utils.models import BaseMessage_Content
 from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
@@ -34,6 +35,9 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder
+from onyx.configs.agent_configs import (
+    AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
+)
 from onyx.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE
 from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT
 from onyx.configs.constants import DEFAULT_PERSONA_ID
@@ -46,6 +50,8 @@ from onyx.context.search.models import SearchRequest
 from onyx.db.engine import get_session_context_manager
 from onyx.db.persona import get_persona_by_id
 from onyx.db.persona import Persona
+from onyx.llm.chat_llm import LLMRateLimitError
+from onyx.llm.chat_llm import LLMTimeoutError
 from onyx.llm.interfaces import LLM
 from onyx.prompts.agent_search import (
    ASSISTANT_SYSTEM_PROMPT_DEFAULT,
@@ -65,8 +71,9 @@ from onyx.tools.tool_implementations.search.search_tool import (
 from onyx.tools.tool_implementations.search.search_tool import SearchResponseSummary
 from onyx.tools.tool_implementations.search.search_tool import SearchTool
 from onyx.tools.utils import explicit_tool_calling_supported
+from onyx.utils.logger import setup_logger

-BaseMessage_Content = str | list[str | dict[str, Any]]
+logger = setup_logger()


 # Post-processing
@@ -295,6 +302,7 @@ def _dispatch_nonempty(
 def dispatch_separated(
    tokens: Iterator[BaseMessage],
    dispatch_event: Callable[[str, int], None],
+    sep_callback: Callable[[int], None] | None = None,
    sep: str = DISPATCH_SEP_CHAR,
 ) -> list[BaseMessage_Content]:
    num = 1
@@ -304,6 +312,10 @@ def dispatch_separated(
        if sep in content:
            sub_question_parts = content.split(sep)
            _dispatch_nonempty(sub_question_parts[0], dispatch_event, num)
+
+            if sep_callback:
+                sep_callback(num)
+
            num += 1
            _dispatch_nonempty(
                "".join(sub_question_parts[1:]).strip(), dispatch_event, num
@@ -312,6 +324,9 @@ def dispatch_separated(
            _dispatch_nonempty(content, dispatch_event, num)
        streamed_tokens.append(content)

+    if sep_callback:
+        sep_callback(num)
+
    return streamed_tokens


@@ -364,8 +379,24 @@ def summarize_history(
        )
    )

-    history_response = llm.invoke(history_context_prompt)
+    try:
+        history_response = llm.invoke(
+            history_context_prompt,
+            timeout_override=AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION,
+        )
+    except LLMTimeoutError:
+        logger.error("LLM Timeout Error - summarize history")
+        return (
+            history  # this is what is done at this point anyway, so we default to this
+        )
+    except LLMRateLimitError:
+        logger.error("LLM Rate Limit Error - summarize history")
+        return (
+            history  # this is what is done at this point anyway, so we default to this
+        )
+
    assert isinstance(history_response.content, str)
+
    return history_response.content


--- a/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
+++ b/backend/onyx/background/celery/tasks/connector_deletion/tasks.py
@@ -179,11 +179,14 @@ def try_generate_document_cc_pair_cleanup_tasks(
        if tasks_generated is None:
            raise ValueError("RedisConnectorDeletion.generate_tasks returned None")

-        insert_sync_record(
-            db_session=db_session,
-            entity_id=cc_pair_id,
-            sync_type=SyncType.CONNECTOR_DELETION,
-        )
+        try:
+            insert_sync_record(
+                db_session=db_session,
+                entity_id=cc_pair_id,
+                sync_type=SyncType.CONNECTOR_DELETION,
+            )
+        except Exception:
+            pass

    except TaskDependencyError:
        redis_connector.delete.set_fence(None)
--- a/backend/onyx/configs/agent_configs.py
+++ b/backend/onyx/configs/agent_configs.py
@@ -13,6 +13,21 @@ AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
 AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
 AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000

+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = 30  # in seconds
+
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = 10  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = 25  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = 4  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = 3  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = 8  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = 12  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = 8  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = 25  # in seconds
+
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = 6  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = 25  # in seconds
+AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = 8  # in seconds
+
 #####
 # Agent Configs
 #####
@@ -77,4 +92,76 @@ AGENT_MAX_STATIC_HISTORY_WORD_LENGTH = int(
    or AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH
 )  # 2000

+
+AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_ENTITY_TERM_EXTRACTION
+)  # 25
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_DOCUMENT_VERIFICATION
+)  # 3
+
+AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_GENERAL_GENERATION
+)  # 30
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBQUESTION_GENERATION
+)  # 8
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_GENERATION
+)  # 12
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_INITIAL_ANSWER_GENERATION
+)  # 25
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_ANSWER_GENERATION
+)  # 25
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_SUBANSWER_CHECK
+)  # 8
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_REFINED_SUBQUESTION_GENERATION
+)  # 6
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_QUERY_REWRITING_GENERATION
+)  # 1
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_HISTORY_SUMMARY_GENERATION
+)  # 4
+
+
+AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS = int(
+    os.environ.get("AGENT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS")
+    or AGENT_DEFAULT_TIMEOUT_OVERRIDE_LLM_COMPARE_ANSWERS
+)  # 8
+
+
 GRAPH_VERSION_NAME: str = "a"
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -263,6 +263,11 @@ class PostgresAdvisoryLocks(Enum):


 class OnyxCeleryQueues:
+    # "celery" is the default queue defined by celery and also the queue
+    # we are running in the primary worker to run system tasks
+    # Tasks running in this queue should be designed specifically to run quickly
+    PRIMARY = "celery"
+
    # Light queue
    VESPA_METADATA_SYNC = "vespa_metadata_sync"
    DOC_PERMISSIONS_UPSERT = "doc_permissions_upsert"
--- a/backend/onyx/connectors/linear/connector.py
+++ b/backend/onyx/connectors/linear/connector.py
@@ -91,6 +91,7 @@ class LinearConnector(LoadConnector, PollConnector, OAuthConnector):
            f"&response_type=code"
            f"&scope=read"
            f"&state={state}"
+            f"&prompt=consent"  # prompts user for access; allows choosing workspace
        )

    @classmethod
--- a/backend/onyx/db/document.py
+++ b/backend/onyx/db/document.py
@@ -105,6 +105,32 @@ def construct_document_select_for_connector_credential_pair_by_needs_sync(
    return stmt


+def construct_document_id_select_for_connector_credential_pair_by_needs_sync(
+    connector_id: int, credential_id: int
+) -> Select:
+    initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
+        and_(
+            DocumentByConnectorCredentialPair.connector_id == connector_id,
+            DocumentByConnectorCredentialPair.credential_id == credential_id,
+        )
+    )
+
+    stmt = (
+        select(DbDocument.id)
+        .where(
+            DbDocument.id.in_(initial_doc_ids_stmt),
+            or_(
+                DbDocument.last_modified
+                > DbDocument.last_synced,  # last_modified is newer than last_synced
+                DbDocument.last_synced.is_(None),  # never synced
+            ),
+        )
+        .distinct()
+    )
+
+    return stmt
+
+
 def get_all_documents_needing_vespa_sync_for_cc_pair(
    db_session: Session, cc_pair_id: int
 ) -> list[DbDocument]:
--- a/backend/onyx/db/document_set.py
+++ b/backend/onyx/db/document_set.py
@@ -545,7 +545,7 @@ def fetch_documents_for_document_set_paginated(
    return documents, documents[-1].id if documents else None


-def construct_document_select_by_docset(
+def construct_document_id_select_by_docset(
    document_set_id: int,
    current_only: bool = True,
 ) -> Select:
@@ -554,7 +554,7 @@ def construct_document_select_by_docset(
    are background processing task generators."""

    stmt = (
-        select(Document)
+        select(Document.id)
        .join(
            DocumentByConnectorCredentialPair,
            DocumentByConnectorCredentialPair.id == Document.id,
--- a/backend/onyx/db/persona.py
+++ b/backend/onyx/db/persona.py
@@ -11,6 +11,7 @@ from sqlalchemy import Select
 from sqlalchemy import select
 from sqlalchemy import update
 from sqlalchemy.orm import aliased
+from sqlalchemy.orm import joinedload
 from sqlalchemy.orm import selectinload
 from sqlalchemy.orm import Session

@@ -19,6 +20,7 @@ from onyx.configs.app_configs import DISABLE_AUTH
 from onyx.configs.chat_configs import BING_API_KEY
 from onyx.configs.chat_configs import CONTEXT_CHUNKS_ABOVE
 from onyx.configs.chat_configs import CONTEXT_CHUNKS_BELOW
+from onyx.configs.constants import NotificationType
 from onyx.context.search.enums import RecencyBiasSetting
 from onyx.db.constants import SLACK_BOT_PERSONA_PREFIX
 from onyx.db.models import DocumentSet
@@ -32,6 +34,8 @@ from onyx.db.models import Tool
 from onyx.db.models import User
 from onyx.db.models import User__UserGroup
 from onyx.db.models import UserGroup
+from onyx.db.notification import create_notification
+from onyx.server.features.persona.models import PersonaSharedNotificationData
 from onyx.server.features.persona.models import PersonaSnapshot
 from onyx.server.features.persona.models import PersonaUpsertRequest
 from onyx.utils.logger import setup_logger
@@ -169,6 +173,15 @@ def make_persona_private(
        for user_uuid in user_ids:
            db_session.add(Persona__User(persona_id=persona_id, user_id=user_uuid))

+            create_notification(
+                user_id=user_uuid,
+                notif_type=NotificationType.PERSONA_SHARED,
+                db_session=db_session,
+                additional_data=PersonaSharedNotificationData(
+                    persona_id=persona_id,
+                ).model_dump(),
+            )
+
        db_session.commit()

    # May cause error if someone switches down to MIT from EE
@@ -708,3 +721,15 @@ def update_persona_label(
 def delete_persona_label(label_id: int, db_session: Session) -> None:
    db_session.query(PersonaLabel).filter(PersonaLabel.id == label_id).delete()
    db_session.commit()
+
+
+def persona_has_search_tool(persona_id: int, db_session: Session) -> bool:
+    persona = (
+        db_session.query(Persona)
+        .options(joinedload(Persona.tools))
+        .filter(Persona.id == persona_id)
+        .one_or_none()
+    )
+    if persona is None:
+        raise ValueError(f"Persona with ID {persona_id} does not exist")
+    return any(tool.in_code_tool_id == "run_search" for tool in persona.tools)
--- a/backend/onyx/db/slack_channel_config.py
+++ b/backend/onyx/db/slack_channel_config.py
@@ -256,7 +256,7 @@ def fetch_slack_channel_config_for_channel_or_default(
    db_session: Session, slack_bot_id: int, channel_name: str | None
 ) -> SlackChannelConfig | None:
    # attempt to find channel-specific config first
-    if channel_name:
+    if channel_name is not None:
        sc_config = db_session.scalar(
            select(SlackChannelConfig).where(
                SlackChannelConfig.slack_bot_id == slack_bot_id,
--- a/backend/onyx/llm/chat_llm.py
+++ b/backend/onyx/llm/chat_llm.py
@@ -50,6 +50,18 @@ litellm.telemetry = False
 _LLM_PROMPT_LONG_TERM_LOG_CATEGORY = "llm_prompt"


+class LLMTimeoutError(Exception):
+    """
+    Exception raised when an LLM call times out.
+    """
+
+
+class LLMRateLimitError(Exception):
+    """
+    Exception raised when an LLM call is rate limited.
+    """
+
+
 def _base_msg_to_role(msg: BaseMessage) -> str:
    if isinstance(msg, HumanMessage) or isinstance(msg, HumanMessageChunk):
        return "user"
@@ -380,6 +392,7 @@ class DefaultMultiLLM(LLM):
        tool_choice: ToolChoiceOptions | None,
        stream: bool,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> litellm.ModelResponse | litellm.CustomStreamWrapper:
        # litellm doesn't accept LangChain BaseMessage objects, so we need to convert them
        # to a dict representation
@@ -405,7 +418,7 @@ class DefaultMultiLLM(LLM):
                stream=stream,
                # model params
                temperature=0,
-                timeout=self._timeout,
+                timeout=timeout_override or self._timeout,
                # For now, we don't support parallel tool calls
                # NOTE: we can't pass this in if tools are not specified
                # or else OpenAI throws an error
@@ -424,6 +437,12 @@ class DefaultMultiLLM(LLM):
        except Exception as e:
            self._record_error(processed_prompt, e)
            # for break pointing
+            if isinstance(e, litellm.Timeout):
+                raise LLMTimeoutError(e)
+
+            elif isinstance(e, litellm.RateLimitError):
+                raise LLMRateLimitError(e)
+
            raise e

    @property
@@ -444,6 +463,7 @@ class DefaultMultiLLM(LLM):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> BaseMessage:
        if LOG_DANSWER_MODEL_INTERACTIONS:
            self.log_model_configs()
@@ -451,7 +471,12 @@ class DefaultMultiLLM(LLM):
        response = cast(
            litellm.ModelResponse,
            self._completion(
-                prompt, tools, tool_choice, False, structured_response_format
+                prompt=prompt,
+                tools=tools,
+                tool_choice=tool_choice,
+                stream=False,
+                structured_response_format=structured_response_format,
+                timeout_override=timeout_override,
            ),
        )
        choice = response.choices[0]
@@ -469,19 +494,31 @@ class DefaultMultiLLM(LLM):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> Iterator[BaseMessage]:
        if LOG_DANSWER_MODEL_INTERACTIONS:
            self.log_model_configs()

        if DISABLE_LITELLM_STREAMING:
-            yield self.invoke(prompt, tools, tool_choice, structured_response_format)
+            yield self.invoke(
+                prompt,
+                tools,
+                tool_choice,
+                structured_response_format,
+                timeout_override,
+            )
            return

        output = None
        response = cast(
            litellm.CustomStreamWrapper,
            self._completion(
-                prompt, tools, tool_choice, True, structured_response_format
+                prompt=prompt,
+                tools=tools,
+                tool_choice=tool_choice,
+                stream=True,
+                structured_response_format=structured_response_format,
+                timeout_override=timeout_override,
            ),
        )
        try:
--- a/backend/onyx/llm/custom_llm.py
+++ b/backend/onyx/llm/custom_llm.py
@@ -81,6 +81,7 @@ class CustomModelServer(LLM):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> BaseMessage:
        return self._execute(prompt)

@@ -90,5 +91,6 @@ class CustomModelServer(LLM):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> Iterator[BaseMessage]:
        yield self._execute(prompt)
--- a/backend/onyx/llm/interfaces.py
+++ b/backend/onyx/llm/interfaces.py
@@ -90,12 +90,13 @@ class LLM(abc.ABC):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> BaseMessage:
        self._precall(prompt)
        # TODO add a postcall to log model outputs independent of concrete class
        # implementation
        return self._invoke_implementation(
-            prompt, tools, tool_choice, structured_response_format
+            prompt, tools, tool_choice, structured_response_format, timeout_override
        )

    @abc.abstractmethod
@@ -105,6 +106,7 @@ class LLM(abc.ABC):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> BaseMessage:
        raise NotImplementedError

@@ -114,12 +116,13 @@ class LLM(abc.ABC):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> Iterator[BaseMessage]:
        self._precall(prompt)
        # TODO add a postcall to log model outputs independent of concrete class
        # implementation
        messages = self._stream_implementation(
-            prompt, tools, tool_choice, structured_response_format
+            prompt, tools, tool_choice, structured_response_format, timeout_override
        )

        tokens = []
@@ -138,5 +141,6 @@ class LLM(abc.ABC):
        tools: list[dict] | None = None,
        tool_choice: ToolChoiceOptions | None = None,
        structured_response_format: dict | None = None,
+        timeout_override: int | None = None,
    ) -> Iterator[BaseMessage]:
        raise NotImplementedError
--- a/backend/onyx/onyxbot/slack/blocks.py
+++ b/backend/onyx/onyxbot/slack/blocks.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from typing import cast

 import pytz
 import timeago  # type: ignore
@@ -338,6 +339,23 @@ def _build_citations_blocks(
    return citations_block


+def _build_answer_blocks(
+    answer: ChatOnyxBotResponse, fallback_answer: str
+) -> list[SectionBlock]:
+    if not answer.answer:
+        answer_blocks = [SectionBlock(text=fallback_answer)]
+    else:
+        # replaces markdown links with slack format links
+        formatted_answer = format_slack_message(answer.answer)
+        answer_processed = decode_escapes(
+            remove_slack_text_interactions(formatted_answer)
+        )
+        answer_blocks = [
+            SectionBlock(text=text) for text in _split_text(answer_processed)
+        ]
+    return answer_blocks
+
+
 def _build_qa_response_blocks(
    answer: ChatOnyxBotResponse,
 ) -> list[Block]:
@@ -376,21 +394,10 @@ def _build_qa_response_blocks(

        filter_block = SectionBlock(text=f"_{filter_text}_")

-    if not answer.answer:
-        answer_blocks = [
-            SectionBlock(
-                text="Sorry, I was unable to find an answer, but I did find some potentially relevant docs 🤓"
-            )
-        ]
-    else:
-        # replaces markdown links with slack format links
-        formatted_answer = format_slack_message(answer.answer)
-        answer_processed = decode_escapes(
-            remove_slack_text_interactions(formatted_answer)
-        )
-        answer_blocks = [
-            SectionBlock(text=text) for text in _split_text(answer_processed)
-        ]
+    answer_blocks = _build_answer_blocks(
+        answer=answer,
+        fallback_answer="Sorry, I was unable to find an answer, but I did find some potentially relevant docs 🤓",
+    )

    response_blocks: list[Block] = []

@@ -481,6 +488,7 @@ def build_slack_response_blocks(
    use_citations: bool,
    feedback_reminder_id: str | None,
    skip_ai_feedback: bool = False,
+    expecting_search_result: bool = False,
 ) -> list[Block]:
    """
    This function is a top level function that builds all the blocks for the Slack response.
@@ -491,9 +499,19 @@ def build_slack_response_blocks(
        message_info.thread_messages[-1].message, message_info.is_bot_msg
    )

-    answer_blocks = _build_qa_response_blocks(
-        answer=answer,
-    )
+    if expecting_search_result:
+        answer_blocks = _build_qa_response_blocks(
+            answer=answer,
+        )
+
+    else:
+        answer_blocks = cast(
+            list[Block],
+            _build_answer_blocks(
+                answer=answer,
+                fallback_answer="Sorry, I was unable to generate an answer.",
+            ),
+        )

    web_follow_up_block = []
    if channel_conf and channel_conf.get("show_continue_in_web_ui"):
--- a/backend/onyx/onyxbot/slack/handlers/handle_regular_answer.py
+++ b/backend/onyx/onyxbot/slack/handlers/handle_regular_answer.py
@@ -27,6 +27,7 @@ from onyx.db.engine import get_session_with_tenant
 from onyx.db.models import SlackChannelConfig
 from onyx.db.models import User
 from onyx.db.persona import get_persona_by_id
+from onyx.db.persona import persona_has_search_tool
 from onyx.db.users import get_user_by_email
 from onyx.onyxbot.slack.blocks import build_slack_response_blocks
 from onyx.onyxbot.slack.handlers.utils import send_team_member_message
@@ -106,7 +107,8 @@ def handle_regular_answer(
        ]
        prompt = persona.prompts[0] if persona.prompts else None

-    should_respond_even_with_no_docs = persona.num_chunks == 0 if persona else False
+    with get_session_with_tenant(tenant_id) as db_session:
+        expecting_search_result = persona_has_search_tool(persona.id, db_session)

    # TODO: Add in support for Slack to truncate messages based on max LLM context
    # llm, _ = get_llms_for_persona(persona)
@@ -303,12 +305,12 @@ def handle_regular_answer(
        return True

    retrieval_info = answer.docs
-    if not retrieval_info:
+    if not retrieval_info and expecting_search_result:
        # This should not happen, even with no docs retrieved, there is still info returned
        raise RuntimeError("Failed to retrieve docs, cannot answer question.")

-    top_docs = retrieval_info.top_documents
-    if not top_docs and not should_respond_even_with_no_docs:
+    top_docs = retrieval_info.top_documents if retrieval_info else []
+    if not top_docs and expecting_search_result:
        logger.error(
            f"Unable to answer question: '{user_message}' - no documents found"
        )
@@ -337,7 +339,8 @@ def handle_regular_answer(
    )

    if (
-        only_respond_if_citations
+        expecting_search_result
+        and only_respond_if_citations
        and not answer.citations
        and not message_info.bypass_filters
    ):
@@ -363,6 +366,7 @@ def handle_regular_answer(
        channel_conf=channel_conf,
        use_citations=True,  # No longer supporting quotes
        feedback_reminder_id=feedback_reminder_id,
+        expecting_search_result=expecting_search_result,
    )

    try:
--- a/backend/onyx/onyxbot/slack/listener.py
+++ b/backend/onyx/onyxbot/slack/listener.py
@@ -801,18 +801,6 @@ def process_message(
                channel_name=channel_name,
            )

-            # Be careful about this default, don't want to accidentally spam every channel
-            # Users should be able to DM slack bot in their private channels though
-            if (
-                not respond_every_channel
-                # Can't have configs for DMs so don't toss them out
-                and not is_dm
-                # If /OnyxBot (is_bot_msg) or @OnyxBot (bypass_filters)
-                # always respond with the default configs
-                and not (details.is_bot_msg or details.bypass_filters)
-            ):
-                return
-
            follow_up = bool(
                slack_channel_config.channel_config
                and slack_channel_config.channel_config.get("follow_up_tags")
--- a/backend/onyx/prompts/agent_search.py
+++ b/backend/onyx/prompts/agent_search.py
@@ -5,8 +5,6 @@ UNKNOWN_ANSWER = "I do not have enough information to answer this question."
 NO_RECOVERED_DOCS = "No relevant information recovered"
 YES = "yes"
 NO = "no"
-
-
 # Framing/Support/Template Prompts
 HISTORY_FRAMING_PROMPT = f"""
 For more context, here is the history of the conversation so far that preceded this question:
--- a/backend/onyx/redis/redis_connector_credential_pair.py
+++ b/backend/onyx/redis/redis_connector_credential_pair.py
@@ -16,9 +16,8 @@ from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisConstants
 from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
 from onyx.db.document import (
-    construct_document_select_for_connector_credential_pair_by_needs_sync,
+    construct_document_id_select_for_connector_credential_pair_by_needs_sync,
 )
-from onyx.db.models import Document
 from onyx.redis.redis_object_helper import RedisObjectHelper


@@ -72,7 +71,8 @@ class RedisConnectorCredentialPair(RedisObjectHelper):

        last_lock_time = time.monotonic()

-        async_results = []
+        num_tasks_sent = 0
+
        cc_pair = get_connector_credential_pair_from_id(
            db_session=db_session,
            cc_pair_id=int(self._id),
@@ -80,14 +80,14 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
        if not cc_pair:
            return None

-        stmt = construct_document_select_for_connector_credential_pair_by_needs_sync(
+        stmt = construct_document_id_select_for_connector_credential_pair_by_needs_sync(
            cc_pair.connector_id, cc_pair.credential_id
        )

        num_docs = 0

-        for doc in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
-            doc = cast(Document, doc)
+        for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
+            doc_id = cast(str, doc_id)
            current_time = time.monotonic()
            if current_time - last_lock_time >= (
                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
@@ -98,7 +98,7 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
            num_docs += 1

            # check if we should skip the document (typically because it's already syncing)
-            if doc.id in self.skip_docs:
+            if doc_id in self.skip_docs:
                continue

            # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
@@ -114,21 +114,21 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
            )

            # Priority on sync's triggered by new indexing should be medium
-            result = celery_app.send_task(
+            celery_app.send_task(
                OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
-                kwargs=dict(document_id=doc.id, tenant_id=tenant_id),
+                kwargs=dict(document_id=doc_id, tenant_id=tenant_id),
                queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,
                task_id=custom_task_id,
                priority=OnyxCeleryPriority.MEDIUM,
            )

-            async_results.append(result)
-            self.skip_docs.add(doc.id)
+            num_tasks_sent += 1
+            self.skip_docs.add(doc_id)

-            if len(async_results) >= max_tasks:
+            if num_tasks_sent >= max_tasks:
                break

-        return len(async_results), num_docs
+        return num_tasks_sent, num_docs


 class RedisGlobalConnectorCredentialPair:
--- a/backend/onyx/redis/redis_document_set.py
+++ b/backend/onyx/redis/redis_document_set.py
@@ -14,8 +14,7 @@ from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisConstants
-from onyx.db.document_set import construct_document_select_by_docset
-from onyx.db.models import Document
+from onyx.db.document_set import construct_document_id_select_by_docset
 from onyx.redis.redis_object_helper import RedisObjectHelper


@@ -66,10 +65,11 @@ class RedisDocumentSet(RedisObjectHelper):
        """
        last_lock_time = time.monotonic()

-        async_results = []
-        stmt = construct_document_select_by_docset(int(self._id), current_only=False)
-        for doc in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
-            doc = cast(Document, doc)
+        num_tasks_sent = 0
+
+        stmt = construct_document_id_select_by_docset(int(self._id), current_only=False)
+        for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
+            doc_id = cast(str, doc_id)
            current_time = time.monotonic()
            if current_time - last_lock_time >= (
                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
@@ -86,17 +86,17 @@ class RedisDocumentSet(RedisObjectHelper):
            # add to the set BEFORE creating the task.
            redis_client.sadd(self.taskset_key, custom_task_id)

-            result = celery_app.send_task(
+            celery_app.send_task(
                OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
-                kwargs=dict(document_id=doc.id, tenant_id=tenant_id),
+                kwargs=dict(document_id=doc_id, tenant_id=tenant_id),
                queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,
                task_id=custom_task_id,
                priority=OnyxCeleryPriority.LOW,
            )

-            async_results.append(result)
+            num_tasks_sent += 1

-        return len(async_results), len(async_results)
+        return num_tasks_sent, num_tasks_sent

    def reset(self) -> None:
        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
--- a/backend/onyx/redis/redis_usergroup.py
+++ b/backend/onyx/redis/redis_usergroup.py
@@ -14,7 +14,6 @@ from onyx.configs.constants import OnyxCeleryPriority
 from onyx.configs.constants import OnyxCeleryQueues
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.configs.constants import OnyxRedisConstants
-from onyx.db.models import Document
 from onyx.redis.redis_object_helper import RedisObjectHelper
 from onyx.utils.variable_functionality import fetch_versioned_implementation
 from onyx.utils.variable_functionality import global_version
@@ -66,23 +65,22 @@ class RedisUserGroup(RedisObjectHelper):
        user group up to date over multiple batches.
        """
        last_lock_time = time.monotonic()
-
-        async_results = []
+        num_tasks_sent = 0

        if not global_version.is_ee_version():
            return 0, 0

        try:
-            construct_document_select_by_usergroup = fetch_versioned_implementation(
+            construct_document_id_select_by_usergroup = fetch_versioned_implementation(
                "onyx.db.user_group",
-                "construct_document_select_by_usergroup",
+                "construct_document_id_select_by_usergroup",
            )
        except ModuleNotFoundError:
            return 0, 0

-        stmt = construct_document_select_by_usergroup(int(self._id))
-        for doc in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
-            doc = cast(Document, doc)
+        stmt = construct_document_id_select_by_usergroup(int(self._id))
+        for doc_id in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
+            doc_id = cast(str, doc_id)
            current_time = time.monotonic()
            if current_time - last_lock_time >= (
                CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4
@@ -99,17 +97,17 @@ class RedisUserGroup(RedisObjectHelper):
            # add to the set BEFORE creating the task.
            redis_client.sadd(self.taskset_key, custom_task_id)

-            result = celery_app.send_task(
+            celery_app.send_task(
                OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
-                kwargs=dict(document_id=doc.id, tenant_id=tenant_id),
+                kwargs=dict(document_id=doc_id, tenant_id=tenant_id),
                queue=OnyxCeleryQueues.VESPA_METADATA_SYNC,
                task_id=custom_task_id,
                priority=OnyxCeleryPriority.LOW,
            )

-            async_results.append(result)
+            num_tasks_sent += 1

-        return len(async_results), len(async_results)
+        return num_tasks_sent, num_tasks_sent

    def reset(self) -> None:
        self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
--- a/backend/onyx/server/documents/cc_pair.py
+++ b/backend/onyx/server/documents/cc_pair.py
@@ -22,6 +22,8 @@ from onyx.background.celery.tasks.pruning.tasks import (
    try_creating_prune_generator_task,
 )
 from onyx.background.celery.versioned_apps.primary import app as primary_app
+from onyx.configs.constants import OnyxCeleryPriority
+from onyx.configs.constants import OnyxCeleryTask
 from onyx.db.connector_credential_pair import add_credential_to_connector
 from onyx.db.connector_credential_pair import (
    get_connector_credential_pair_from_id_for_user,
@@ -228,6 +230,13 @@ def update_cc_pair_status(

    db_session.commit()

+    # this speeds up the start of indexing by firing the check immediately
+    primary_app.send_task(
+        OnyxCeleryTask.CHECK_FOR_INDEXING,
+        kwargs=dict(tenant_id=tenant_id),
+        priority=OnyxCeleryPriority.HIGH,
+    )
+
    return JSONResponse(
        status_code=HTTPStatus.OK, content={"message": str(HTTPStatus.OK)}
    )
@@ -540,7 +549,14 @@ def associate_credential_to_connector(
    metadata: ConnectorCredentialPairMetadata,
    user: User | None = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
+    tenant_id: str = Depends(get_current_tenant_id),
 ) -> StatusResponse[int]:
+    """NOTE(rkuo): internally discussed and the consensus is this endpoint
+    and create_connector_with_mock_credential should be combined.
+
+    The intent of this endpoint is to handle connectors that actually need credentials.
+    """
+
    fetch_ee_implementation_or_noop(
        "onyx.db.user_group", "validate_object_creation_for_user", None
    )(
@@ -563,6 +579,18 @@ def associate_credential_to_connector(
            groups=metadata.groups,
        )

+        # trigger indexing immediately
+        primary_app.send_task(
+            OnyxCeleryTask.CHECK_FOR_INDEXING,
+            priority=OnyxCeleryPriority.HIGH,
+            kwargs={"tenant_id": tenant_id},
+        )
+
+        logger.info(
+            f"associate_credential_to_connector - running check_for_indexing: "
+            f"cc_pair={response.data}"
+        )
+
        return response
    except IntegrityError as e:
        logger.error(f"IntegrityError: {e}")
--- a/backend/onyx/server/documents/connector.py
+++ b/backend/onyx/server/documents/connector.py
@@ -804,6 +804,14 @@ def create_connector_with_mock_credential(
    db_session: Session = Depends(get_session),
    tenant_id: str = Depends(get_current_tenant_id),
 ) -> StatusResponse:
+    """NOTE(rkuo): internally discussed and the consensus is this endpoint
+    and associate_credential_to_connector should be combined.
+
+    The intent of this endpoint is to handle connectors that don't need credentials,
+    AKA web, file, etc ... but there isn't any reason a single endpoint couldn't
+    server this purpose.
+    """
+
    fetch_ee_implementation_or_noop(
        "onyx.db.user_group", "validate_object_creation_for_user", None
    )(
@@ -841,6 +849,18 @@ def create_connector_with_mock_credential(
            groups=connector_data.groups,
        )

+        # trigger indexing immediately
+        primary_app.send_task(
+            OnyxCeleryTask.CHECK_FOR_INDEXING,
+            priority=OnyxCeleryPriority.HIGH,
+            kwargs={"tenant_id": tenant_id},
+        )
+
+        logger.info(
+            f"create_connector_with_mock_credential - running check_for_indexing: "
+            f"cc_pair={response.data}"
+        )
+
        create_milestone_and_report(
            user=user,
            distinct_id=user.email if user else tenant_id or "N/A",
@@ -1005,6 +1025,8 @@ def connector_run_once(
        kwargs={"tenant_id": tenant_id},
    )

+    logger.info("connector_run_once - running check_for_indexing")
+
    msg = f"Marked {num_triggers} index attempts with indexing triggers."
    return StatusResponse(
        success=True,
--- a/backend/onyx/server/documents/standard_oauth.py
+++ b/backend/onyx/server/documents/standard_oauth.py
@@ -179,12 +179,10 @@ def oauth_callback(
        db_session=db_session,
    )

+    # TODO: use a library for url handling
+    sep = "&" if "?" in desired_return_url else "?"
    return CallbackResponse(
-        redirect_url=(
-            f"{desired_return_url}?credentialId={credential.id}"
-            if "?" not in desired_return_url
-            else f"{desired_return_url}&credentialId={credential.id}"
-        )
+        redirect_url=f"{desired_return_url}{sep}credentialId={credential.id}"
    )


--- a/backend/onyx/server/features/document_set/api.py
+++ b/backend/onyx/server/features/document_set/api.py
@@ -6,11 +6,15 @@ from sqlalchemy.orm import Session

 from onyx.auth.users import current_curator_or_admin_user
 from onyx.auth.users import current_user
+from onyx.background.celery.versioned_apps.primary import app as primary_app
+from onyx.configs.constants import OnyxCeleryPriority
+from onyx.configs.constants import OnyxCeleryTask
 from onyx.db.document_set import check_document_sets_are_public
 from onyx.db.document_set import fetch_all_document_sets_for_user
 from onyx.db.document_set import insert_document_set
 from onyx.db.document_set import mark_document_set_as_to_be_deleted
 from onyx.db.document_set import update_document_set
+from onyx.db.engine import get_current_tenant_id
 from onyx.db.engine import get_session
 from onyx.db.models import User
 from onyx.server.features.document_set.models import CheckDocSetPublicRequest
@@ -29,6 +33,7 @@ def create_document_set(
    document_set_creation_request: DocumentSetCreationRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
+    tenant_id: str = Depends(get_current_tenant_id),
 ) -> int:
    fetch_ee_implementation_or_noop(
        "onyx.db.user_group", "validate_object_creation_for_user", None
@@ -46,6 +51,13 @@ def create_document_set(
        )
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))
+
+    primary_app.send_task(
+        OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
+        kwargs={"tenant_id": tenant_id},
+        priority=OnyxCeleryPriority.HIGH,
+    )
+
    return document_set_db_model.id


@@ -54,6 +66,7 @@ def patch_document_set(
    document_set_update_request: DocumentSetUpdateRequest,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
+    tenant_id: str = Depends(get_current_tenant_id),
 ) -> None:
    fetch_ee_implementation_or_noop(
        "onyx.db.user_group", "validate_object_creation_for_user", None
@@ -72,12 +85,19 @@ def patch_document_set(
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

+    primary_app.send_task(
+        OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
+        kwargs={"tenant_id": tenant_id},
+        priority=OnyxCeleryPriority.HIGH,
+    )
+

@router.delete("/admin/document-set/{document_set_id}")
 def delete_document_set(
    document_set_id: int,
    user: User = Depends(current_curator_or_admin_user),
    db_session: Session = Depends(get_session),
+    tenant_id: str = Depends(get_current_tenant_id),
 ) -> None:
    try:
        mark_document_set_as_to_be_deleted(
@@ -88,6 +108,12 @@ def delete_document_set(
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

+    primary_app.send_task(
+        OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
+        kwargs={"tenant_id": tenant_id},
+        priority=OnyxCeleryPriority.HIGH,
+    )
+

 """Endpoints for non-admins"""

--- a/backend/onyx/server/manage/administrative.py
+++ b/backend/onyx/server/manage/administrative.py
@@ -197,6 +197,11 @@ def create_deletion_attempt_for_connector_id(
        kwargs={"tenant_id": tenant_id},
    )

+    logger.info(
+        f"create_deletion_attempt_for_connector_id - running check_for_connector_deletion: "
+        f"cc_pair={cc_pair.id}"
+    )
+
    if cc_pair.connector.source == DocumentSource.FILE:
        connector = cc_pair.connector
        file_store = get_default_file_store(db_session)
--- a/backend/onyx/server/manage/slack_bot.py
+++ b/backend/onyx/server/manage/slack_bot.py
@@ -247,6 +247,7 @@ def create_bot(
        respond_member_group_list=[],
        answer_filters=[],
        follow_up_tags=[],
+        respond_tag_only=True,
    )
    insert_slack_channel_config(
        db_session=db_session,
--- a/backend/onyx/server/manage/users.py
+++ b/backend/onyx/server/manage/users.py
@@ -34,6 +34,7 @@ from onyx.auth.users import current_curator_or_admin_user
 from onyx.auth.users import current_user
 from onyx.auth.users import optional_user
 from onyx.configs.app_configs import AUTH_TYPE
+from onyx.configs.app_configs import DEV_MODE
 from onyx.configs.app_configs import ENABLE_EMAIL_INVITES
 from onyx.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS
 from onyx.configs.app_configs import VALID_EMAIL_DOMAINS
@@ -286,7 +287,7 @@ def bulk_invite_users(
            detail=f"Invalid email address: {email} - {str(e)}",
        )

-    if MULTI_TENANT:
+    if MULTI_TENANT and not DEV_MODE:
        try:
            fetch_ee_implementation_or_noop(
                "onyx.server.tenants.provisioning", "add_users_to_tenant", None
--- a/backend/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/onyx/server/query_and_chat/chat_backend.py
@@ -717,15 +717,14 @@ def upload_files_for_chat(
            else ChatFileType.PLAIN_TEXT
        )

-        if file_type == ChatFileType.IMAGE:
-            file_content_io = file.file
-            # NOTE: Image conversion to JPEG used to be enforced here.
-            # This was removed to:
-            # 1. Preserve original file content for downloads
-            # 2. Maintain transparency in formats like PNG
-            # 3. Ameliorate issue with file conversion
-        else:
-            file_content_io = io.BytesIO(file.file.read())
+        file_content = file.file.read()  # Read the file content
+
+        # NOTE: Image conversion to JPEG used to be enforced here.
+        # This was removed to:
+        # 1. Preserve original file content for downloads
+        # 2. Maintain transparency in formats like PNG
+        # 3. Ameliorate issue with file conversion
+        file_content_io = io.BytesIO(file_content)

        new_content_type = file.content_type

@@ -747,6 +746,7 @@ def upload_files_for_chat(
                file_name=file.filename or "",
            )
            text_file_id = str(uuid.uuid4())
+
            file_store.save_file(
                file_name=text_file_id,
                content=io.BytesIO(extracted_text.encode()),
--- a/backend/scripts/debugging/onyx_redis.py
+++ b/backend/scripts/debugging/onyx_redis.py
@@ -10,6 +10,8 @@ from uuid import UUID
 from redis import Redis

 from ee.onyx.server.tenants.user_mapping import get_tenant_id_for_email
+from onyx.auth.invited_users import get_invited_users
+from onyx.auth.invited_users import write_invited_users
 from onyx.configs.app_configs import REDIS_AUTH_KEY_PREFIX
 from onyx.configs.app_configs import REDIS_DB_NUMBER
 from onyx.configs.app_configs import REDIS_HOST
@@ -21,6 +23,7 @@ from onyx.db.users import get_user_by_email
 from onyx.redis.redis_pool import RedisPool
 from shared_configs.configs import MULTI_TENANT
 from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA
+from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR

 # Tool to run helpful operations on Redis in production
 # This is targeted for internal usage and may not have all the necessary parameters
@@ -310,6 +313,13 @@ if __name__ == "__main__":
        required=False,
    )

+    parser.add_argument(
+        "--tenant-id",
+        type=str,
+        help="Tenant ID for get, delete user token, or add to invited users",
+        required=False,
+    )
+
    parser.add_argument(
        "--batch",
        type=int,
@@ -328,11 +338,32 @@ if __name__ == "__main__":
    parser.add_argument(
        "--user-email",
        type=str,
-        help="User email for get or delete user token",
+        help="User email for get, delete user token, or add to invited users",
        required=False,
    )

    args = parser.parse_args()
+
+    if args.tenant_id:
+        CURRENT_TENANT_ID_CONTEXTVAR.set(args.tenant_id)
+
+    if args.command == "add_invited_user":
+        if not args.user_email:
+            print("Error: --user-email is required for add_invited_user command")
+            sys.exit(1)
+
+        current_invited_users = get_invited_users()
+        if args.user_email not in current_invited_users:
+            current_invited_users.append(args.user_email)
+            if args.dry_run:
+                print(f"(DRY-RUN) Would add {args.user_email} to invited users")
+            else:
+                write_invited_users(current_invited_users)
+                print(f"Added {args.user_email} to invited users")
+        else:
+            print(f"{args.user_email} is already in the invited users list")
+        sys.exit(0)
+
    exitcode = onyx_redis(
        command=args.command,
        batch=args.batch,
--- a/backend/scripts/debugging/onyx_vespa.py
+++ b/backend/scripts/debugging/onyx_vespa.py
@@ -255,6 +255,24 @@ def get_documents_for_tenant_connector(
    print_documents(documents)


+def search_for_document(
+    index_name: str, document_id: str, max_hits: int | None = 10
+) -> List[Dict[str, Any]]:
+    yql_query = (
+        f'select * from sources {index_name} where document_id contains "{document_id}"'
+    )
+    params: dict[str, Any] = {"yql": yql_query}
+    if max_hits is not None:
+        params["hits"] = max_hits
+    with get_vespa_http_client() as client:
+        response = client.get(f"{SEARCH_ENDPOINT}/search/", params=params)
+        response.raise_for_status()
+        result = response.json()
+        documents = result.get("root", {}).get("children", [])
+        logger.info(f"Found {len(documents)} documents from query.")
+        return documents
+
+
 def search_documents(
    tenant_id: str, connector_id: int, query: str, n: int = 10
 ) -> None:
@@ -440,10 +458,98 @@ def get_document_acls(
            print("-" * 80)


+def get_current_chunk_count(
+    document_id: str, index_name: str, tenant_id: str
+) -> int | None:
+    with get_session_with_tenant(tenant_id=tenant_id) as session:
+        return (
+            session.query(Document.chunk_count)
+            .filter(Document.id == document_id)
+            .scalar()
+        )
+
+
+def get_number_of_chunks_we_think_exist(
+    document_id: str, index_name: str, tenant_id: str
+) -> int:
+    current_chunk_count = get_current_chunk_count(document_id, index_name, tenant_id)
+    print(f"Current chunk count: {current_chunk_count}")
+
+    doc_info = VespaIndex.enrich_basic_chunk_info(
+        index_name=index_name,
+        http_client=get_vespa_http_client(),
+        document_id=document_id,
+        previous_chunk_count=current_chunk_count,
+        new_chunk_count=0,
+    )
+
+    chunk_ids = get_document_chunk_ids(
+        enriched_document_info_list=[doc_info],
+        tenant_id=tenant_id,
+        large_chunks_enabled=False,
+    )
+    return len(chunk_ids)
+
+
 class VespaDebugging:
    # Class for managing Vespa debugging actions.
    def __init__(self, tenant_id: str | None = None):
        self.tenant_id = POSTGRES_DEFAULT_SCHEMA if not tenant_id else tenant_id
+        self.index_name = get_index_name(self.tenant_id)
+
+    def sample_document_counts(self) -> None:
+        # Sample random documents and compare chunk counts
+        mismatches = []
+        no_chunks = []
+        with get_session_with_tenant(tenant_id=self.tenant_id) as session:
+            # Get a sample of random documents
+            from sqlalchemy import func
+
+            sample_docs = (
+                session.query(Document.id, Document.link, Document.semantic_id)
+                .order_by(func.random())
+                .limit(1000)
+                .all()
+            )
+
+            for doc in sample_docs:
+                document_id, link, semantic_id = doc
+                (
+                    number_of_chunks_in_vespa,
+                    number_of_chunks_we_think_exist,
+                ) = self.compare_chunk_count(document_id)
+                if number_of_chunks_in_vespa != number_of_chunks_we_think_exist:
+                    mismatches.append(
+                        (
+                            document_id,
+                            link,
+                            semantic_id,
+                            number_of_chunks_in_vespa,
+                            number_of_chunks_we_think_exist,
+                        )
+                    )
+                elif number_of_chunks_in_vespa == 0:
+                    no_chunks.append((document_id, link, semantic_id))
+
+        # Print results
+        print("\nDocuments with mismatched chunk counts:")
+        for doc_id, link, semantic_id, vespa_count, expected_count in mismatches:
+            print(f"Document ID: {doc_id}")
+            print(f"Link: {link}")
+            print(f"Semantic ID: {semantic_id}")
+            print(f"Chunks in Vespa: {vespa_count}")
+            print(f"Expected chunks: {expected_count}")
+            print("-" * 80)
+
+        print("\nDocuments with no chunks in Vespa:")
+        for doc_id, link, semantic_id in no_chunks:
+            print(f"Document ID: {doc_id}")
+            print(f"Link: {link}")
+            print(f"Semantic ID: {semantic_id}")
+            print("-" * 80)
+
+        print(f"\nTotal mismatches: {len(mismatches)}")
+        print(f"Total documents with no chunks: {len(no_chunks)}")

    def print_config(self) -> None:
        # Print Vespa config.
@@ -457,6 +563,16 @@ class VespaDebugging:
        # List documents for a tenant.
        list_documents(n, self.tenant_id)

+    def compare_chunk_count(self, document_id: str) -> tuple[int, int]:
+        docs = search_for_document(self.index_name, document_id, max_hits=None)
+        number_of_chunks_we_think_exist = get_number_of_chunks_we_think_exist(
+            document_id, self.index_name, self.tenant_id
+        )
+        print(
+            f"Number of chunks in Vespa: {len(docs)}, Number of chunks we think exist: {number_of_chunks_we_think_exist}"
+        )
+        return len(docs), number_of_chunks_we_think_exist
+
    def search_documents(self, connector_id: int, query: str, n: int = 10) -> None:
        # Search documents for a tenant and connector.
        search_documents(self.tenant_id, connector_id, query, n)
@@ -464,9 +580,11 @@ class VespaDebugging:
    def update_document(
        self, connector_id: int, doc_id: str, fields: Dict[str, Any]
    ) -> None:
-        # Update a document.
        update_document(self.tenant_id, connector_id, doc_id, fields)

+    def search_for_document(self, document_id: str) -> List[Dict[str, Any]]:
+        return search_for_document(self.index_name, document_id)
+
    def delete_document(self, connector_id: int, doc_id: str) -> None:
        # Delete a document.
        delete_document(self.tenant_id, connector_id, doc_id)
@@ -483,7 +601,6 @@ class VespaDebugging:


 def main() -> None:
-    # Main CLI entry point.
    parser = argparse.ArgumentParser(description="Vespa debugging tool")
    parser.add_argument(
        "--action",
--- a/backend/tests/integration/Dockerfile
+++ b/backend/tests/integration/Dockerfile
@@ -70,6 +70,7 @@ COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
 # Set up application files
 COPY ./onyx /app/onyx
 COPY ./shared_configs /app/shared_configs
+COPY ./alembic_tenants /app/alembic_tenants
 COPY ./alembic /app/alembic
 COPY ./alembic.ini /app/alembic.ini
 COPY ./pytest.ini /app/pytest.ini
--- a/backend/tests/integration/common_utils/managers/tenant.py
+++ b/backend/tests/integration/common_utils/managers/tenant.py
@@ -24,35 +24,6 @@ def generate_auth_token() -> str:


 class TenantManager:
-    @staticmethod
-    def create(
-        tenant_id: str | None = None,
-        initial_admin_email: str | None = None,
-        referral_source: str | None = None,
-    ) -> dict[str, str]:
-        body = {
-            "tenant_id": tenant_id,
-            "initial_admin_email": initial_admin_email,
-            "referral_source": referral_source,
-        }
-
-        token = generate_auth_token()
-        headers = {
-            "Authorization": f"Bearer {token}",
-            "X-API-KEY": "",
-            "Content-Type": "application/json",
-        }
-
-        response = requests.post(
-            url=f"{API_SERVER_URL}/tenants/create",
-            json=body,
-            headers=headers,
-        )
-
-        response.raise_for_status()
-
-        return response.json()
-
    @staticmethod
    def get_all_users(
        user_performing_action: DATestUser | None = None,
--- a/backend/tests/integration/common_utils/managers/user.py
+++ b/backend/tests/integration/common_utils/managers/user.py
@@ -92,6 +92,7 @@ class UserManager:

        # Set cookies in the headers
        test_user.headers["Cookie"] = f"fastapiusersauth={session_cookie}; "
+        test_user.cookies = {"fastapiusersauth": session_cookie}
        return test_user

    @staticmethod
@@ -102,6 +103,7 @@ class UserManager:
        response = requests.get(
            url=f"{API_SERVER_URL}/me",
            headers=user_to_verify.headers,
+            cookies=user_to_verify.cookies,
        )

        if user_to_verify.is_active is False:
--- a/backend/tests/integration/common_utils/reset.py
+++ b/backend/tests/integration/common_utils/reset.py
@@ -242,6 +242,18 @@ def reset_postgres_multitenant() -> None:
        schema_name = schema[0]
        cur.execute(f'DROP SCHEMA "{schema_name}" CASCADE')

+    # Drop tables in the public schema
+    cur.execute(
+        """
+        SELECT tablename FROM pg_tables
+        WHERE schemaname = 'public'
+        """
+    )
+    public_tables = cur.fetchall()
+    for table in public_tables:
+        table_name = table[0]
+        cur.execute(f'DROP TABLE IF EXISTS public."{table_name}" CASCADE')
+
    cur.close()
    conn.close()

--- a/backend/tests/integration/common_utils/test_models.py
+++ b/backend/tests/integration/common_utils/test_models.py
@@ -44,6 +44,7 @@ class DATestUser(BaseModel):
    headers: dict
    role: UserRole
    is_active: bool
+    cookies: dict = {}


 class DATestPersonaLabel(BaseModel):
--- a/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py
+++ b/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py
@@ -4,7 +4,6 @@ from tests.integration.common_utils.managers.cc_pair import CCPairManager
 from tests.integration.common_utils.managers.chat import ChatSessionManager
 from tests.integration.common_utils.managers.document import DocumentManager
 from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
-from tests.integration.common_utils.managers.tenant import TenantManager
 from tests.integration.common_utils.managers.user import UserManager
 from tests.integration.common_utils.test_models import DATestAPIKey
 from tests.integration.common_utils.test_models import DATestCCPair
@@ -13,25 +12,28 @@ from tests.integration.common_utils.test_models import DATestUser


 def test_multi_tenant_access_control(reset_multitenant: None) -> None:
-    # Create Tenant 1 and its Admin User
-    TenantManager.create("tenant_dev1", "test1@test.com", "Data Plane Registration")
-    test_user1: DATestUser = UserManager.create(name="test1", email="test1@test.com")
-    assert UserManager.is_role(test_user1, UserRole.ADMIN)
+    # Creating an admin user (first user created is automatically an admin and also proviions the tenant
+    admin_user1: DATestUser = UserManager.create(
+        email="admin@onyx-test.com",
+    )
+
+    assert UserManager.is_role(admin_user1, UserRole.ADMIN)

    # Create Tenant 2 and its Admin User
-    TenantManager.create("tenant_dev2", "test2@test.com", "Data Plane Registration")
-    test_user2: DATestUser = UserManager.create(name="test2", email="test2@test.com")
-    assert UserManager.is_role(test_user2, UserRole.ADMIN)
+    admin_user2: DATestUser = UserManager.create(
+        email="admin2@onyx-test.com",
+    )
+    assert UserManager.is_role(admin_user2, UserRole.ADMIN)

    # Create connectors for Tenant 1
    cc_pair_1: DATestCCPair = CCPairManager.create_from_scratch(
-        user_performing_action=test_user1,
+        user_performing_action=admin_user1,
    )
    api_key_1: DATestAPIKey = APIKeyManager.create(
-        user_performing_action=test_user1,
+        user_performing_action=admin_user1,
    )
-    api_key_1.headers.update(test_user1.headers)
-    LLMProviderManager.create(user_performing_action=test_user1)
+    api_key_1.headers.update(admin_user1.headers)
+    LLMProviderManager.create(user_performing_action=admin_user1)

    # Seed documents for Tenant 1
    cc_pair_1.documents = []
@@ -49,13 +51,13 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None:

    # Create connectors for Tenant 2
    cc_pair_2: DATestCCPair = CCPairManager.create_from_scratch(
-        user_performing_action=test_user2,
+        user_performing_action=admin_user2,
    )
    api_key_2: DATestAPIKey = APIKeyManager.create(
-        user_performing_action=test_user2,
+        user_performing_action=admin_user2,
    )
-    api_key_2.headers.update(test_user2.headers)
-    LLMProviderManager.create(user_performing_action=test_user2)
+    api_key_2.headers.update(admin_user2.headers)
+    LLMProviderManager.create(user_performing_action=admin_user2)

    # Seed documents for Tenant 2
    cc_pair_2.documents = []
@@ -76,17 +78,17 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None:

    # Create chat sessions for each user
    chat_session1: DATestChatSession = ChatSessionManager.create(
-        user_performing_action=test_user1
+        user_performing_action=admin_user1
    )
    chat_session2: DATestChatSession = ChatSessionManager.create(
-        user_performing_action=test_user2
+        user_performing_action=admin_user2
    )

    # User 1 sends a message and gets a response
    response1 = ChatSessionManager.send_message(
        chat_session_id=chat_session1.id,
        message="What is in Tenant 1's documents?",
-        user_performing_action=test_user1,
+        user_performing_action=admin_user1,
    )
    # Assert that the search tool was used
    assert response1.tool_name == "run_search"
@@ -100,14 +102,16 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None:
    ), "Tenant 2 document IDs should not be in the response"

    # Assert that the contents are correct
-    for doc in response1.tool_result or []:
-        assert doc["content"] == "Tenant 1 Document Content"
+    assert any(
+        doc["content"] == "Tenant 1 Document Content"
+        for doc in response1.tool_result or []
+    ), "Tenant 1 Document Content not found in any document"

    # User 2 sends a message and gets a response
    response2 = ChatSessionManager.send_message(
        chat_session_id=chat_session2.id,
        message="What is in Tenant 2's documents?",
-        user_performing_action=test_user2,
+        user_performing_action=admin_user2,
    )
    # Assert that the search tool was used
    assert response2.tool_name == "run_search"
@@ -119,15 +123,18 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None:
    assert not response_doc_ids.intersection(
        tenant1_doc_ids
    ), "Tenant 1 document IDs should not be in the response"
+
    # Assert that the contents are correct
-    for doc in response2.tool_result or []:
-        assert doc["content"] == "Tenant 2 Document Content"
+    assert any(
+        doc["content"] == "Tenant 2 Document Content"
+        for doc in response2.tool_result or []
+    ), "Tenant 2 Document Content not found in any document"

    # User 1 tries to access Tenant 2's documents
    response_cross = ChatSessionManager.send_message(
        chat_session_id=chat_session1.id,
        message="What is in Tenant 2's documents?",
-        user_performing_action=test_user1,
+        user_performing_action=admin_user1,
    )
    # Assert that the search tool was used
    assert response_cross.tool_name == "run_search"
@@ -140,7 +147,7 @@ def test_multi_tenant_access_control(reset_multitenant: None) -> None:
    response_cross2 = ChatSessionManager.send_message(
        chat_session_id=chat_session2.id,
        message="What is in Tenant 1's documents?",
-        user_performing_action=test_user2,
+        user_performing_action=admin_user2,
    )
    # Assert that the search tool was used
    assert response_cross2.tool_name == "run_search"
--- a/backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py
+++ b/backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py
@@ -4,14 +4,12 @@ from onyx.db.models import UserRole
 from tests.integration.common_utils.managers.cc_pair import CCPairManager
 from tests.integration.common_utils.managers.connector import ConnectorManager
 from tests.integration.common_utils.managers.credential import CredentialManager
-from tests.integration.common_utils.managers.tenant import TenantManager
 from tests.integration.common_utils.managers.user import UserManager
 from tests.integration.common_utils.test_models import DATestUser


 # Test flow from creating tenant to registering as a user
 def test_tenant_creation(reset_multitenant: None) -> None:
-    TenantManager.create("tenant_dev", "test@test.com", "Data Plane Registration")
    test_user: DATestUser = UserManager.create(name="test", email="test@test.com")

    assert UserManager.is_role(test_user, UserRole.ADMIN)
--- a/backend/tests/integration/tests/index_attempt/test_index_attempt_pagination.py
+++ b/backend/tests/integration/tests/index_attempt/test_index_attempt_pagination.py
@@ -1,23 +1,23 @@
+import time
 from datetime import datetime

 from onyx.db.models import IndexingStatus
 from tests.integration.common_utils.managers.cc_pair import CCPairManager
 from tests.integration.common_utils.managers.index_attempt import IndexAttemptManager
 from tests.integration.common_utils.managers.user import UserManager
-from tests.integration.common_utils.test_models import DATestIndexAttempt
 from tests.integration.common_utils.test_models import DATestUser


 def _verify_index_attempt_pagination(
    cc_pair_id: int,
-    index_attempts: list[DATestIndexAttempt],
+    index_attempt_ids: list[int],
    page_size: int = 5,
    user_performing_action: DATestUser | None = None,
 ) -> None:
    retrieved_attempts: list[int] = []
    last_time_started = None  # Track the last time_started seen

-    for i in range(0, len(index_attempts), page_size):
+    for i in range(0, len(index_attempt_ids), page_size):
        paginated_result = IndexAttemptManager.get_index_attempt_page(
            cc_pair_id=cc_pair_id,
            page=(i // page_size),
@@ -26,9 +26,9 @@ def _verify_index_attempt_pagination(
        )

        # Verify that the total items is equal to the length of the index attempts list
-        assert paginated_result.total_items == len(index_attempts)
+        assert paginated_result.total_items == len(index_attempt_ids)
        # Verify that the number of items in the page is equal to the page size
-        assert len(paginated_result.items) == min(page_size, len(index_attempts) - i)
+        assert len(paginated_result.items) == min(page_size, len(index_attempt_ids) - i)

        # Verify time ordering within the page (descending order)
        for attempt in paginated_result.items:
@@ -42,7 +42,7 @@ def _verify_index_attempt_pagination(
        retrieved_attempts.extend([attempt.id for attempt in paginated_result.items])

    # Create a set of all the expected index attempt IDs
-    all_expected_attempts = set(attempt.id for attempt in index_attempts)
+    all_expected_attempts = set(index_attempt_ids)
    # Create a set of all the retrieved index attempt IDs
    all_retrieved_attempts = set(retrieved_attempts)

@@ -51,6 +51,9 @@ def _verify_index_attempt_pagination(


 def test_index_attempt_pagination(reset: None) -> None:
+    MAX_WAIT = 60
+    all_attempt_ids: list[int] = []
+
    # Create an admin user to perform actions
    user_performing_action: DATestUser = UserManager.create(
        name="admin_performing_action",
@@ -62,20 +65,49 @@ def test_index_attempt_pagination(reset: None) -> None:
        user_performing_action=user_performing_action,
    )

-    # Create 300 successful index attempts
+    # Creating a CC pair will create an index attempt as well. wait for it.
+    start = time.monotonic()
+    while True:
+        paginated_result = IndexAttemptManager.get_index_attempt_page(
+            cc_pair_id=cc_pair.id,
+            page=0,
+            page_size=5,
+            user_performing_action=user_performing_action,
+        )
+
+        if paginated_result.total_items == 1:
+            all_attempt_ids.append(paginated_result.items[0].id)
+            print("Initial index attempt from cc_pair creation detected. Continuing...")
+            break
+
+        elapsed = time.monotonic() - start
+        if elapsed > MAX_WAIT:
+            raise TimeoutError(
+                f"Initial index attempt: Not detected within {MAX_WAIT} seconds."
+            )
+
+        print(
+            f"Waiting for initial index attempt: elapsed={elapsed:.2f} timeout={MAX_WAIT}"
+        )
+        time.sleep(1)
+
+    # Create 299 successful index attempts (for 300 total)
    base_time = datetime.now()
-    all_attempts = IndexAttemptManager.create_test_index_attempts(
-        num_attempts=300,
+    generated_attempts = IndexAttemptManager.create_test_index_attempts(
+        num_attempts=299,
        cc_pair_id=cc_pair.id,
        status=IndexingStatus.SUCCESS,
        base_time=base_time,
    )

+    for attempt in generated_attempts:
+        all_attempt_ids.append(attempt.id)
+
    # Verify basic pagination with different page sizes
    print("Verifying basic pagination with page size 5")
    _verify_index_attempt_pagination(
        cc_pair_id=cc_pair.id,
-        index_attempts=all_attempts,
+        index_attempt_ids=all_attempt_ids,
        page_size=5,
        user_performing_action=user_performing_action,
    )
@@ -84,7 +116,7 @@ def test_index_attempt_pagination(reset: None) -> None:
    print("Verifying pagination with page size 100")
    _verify_index_attempt_pagination(
        cc_pair_id=cc_pair.id,
-        index_attempts=all_attempts,
+        index_attempt_ids=all_attempt_ids,
        page_size=100,
        user_performing_action=user_performing_action,
    )
--- a/deployment/docker_compose/docker-compose.multitenant-dev.yml
+++ b/deployment/docker_compose/docker-compose.multitenant-dev.yml
@@ -0,0 +1,423 @@
+services:
+  api_server:
+    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
+    build:
+      context: ../../backend
+      dockerfile: Dockerfile
+    command: >
+      /bin/sh -c "
+      alembic -n schema_private upgrade head &&
+      echo \"Starting Onyx Api Server\" &&
+      uvicorn onyx.main:app --host 0.0.0.0 --port 8080"
+    depends_on:
+      - relational_db
+      - index
+      - cache
+      - inference_model_server
+    restart: always
+    ports:
+      - "8080:8080"
+    environment:
+      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
+      - MULTI_TENANT=true
+      - LOG_LEVEL=DEBUG
+      - AUTH_TYPE=cloud
+      - REQUIRE_EMAIL_VERIFICATION=false
+      - DISABLE_TELEMETRY=true
+      - IMAGE_TAG=test
+      - DEV_MODE=true
+      # Auth Settings
+      - SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-}
+      - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-}
+      - VALID_EMAIL_DOMAINS=${VALID_EMAIL_DOMAINS:-}
+      - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-}
+      - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-}
+      - SMTP_SERVER=${SMTP_SERVER:-}
+      - SMTP_PORT=${SMTP_PORT:-587}
+      - SMTP_USER=${SMTP_USER:-}
+      - SMTP_PASS=${SMTP_PASS:-}
+      - ENABLE_EMAIL_INVITES=${ENABLE_EMAIL_INVITES:-}
+      - EMAIL_FROM=${EMAIL_FROM:-}
+      - OAUTH_CLIENT_ID=${OAUTH_CLIENT_ID:-}
+      - OAUTH_CLIENT_SECRET=${OAUTH_CLIENT_SECRET:-}
+      - OPENID_CONFIG_URL=${OPENID_CONFIG_URL:-}
+      - TRACK_EXTERNAL_IDP_EXPIRY=${TRACK_EXTERNAL_IDP_EXPIRY:-}
+      - CORS_ALLOWED_ORIGIN=${CORS_ALLOWED_ORIGIN:-}
+      # Gen AI Settings
+      - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
+      - QA_TIMEOUT=${QA_TIMEOUT:-}
+      - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
+      - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
+      - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
+      - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
+      - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
+      - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}
+      - BING_API_KEY=${BING_API_KEY:-}
+      - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-}
+      - GEN_AI_API_KEY=${GEN_AI_API_KEY:-}
+      - TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-}
+      # Query Options
+      - DOC_TIME_DECAY=${DOC_TIME_DECAY:-}
+      - HYBRID_ALPHA=${HYBRID_ALPHA:-}
+      - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
+      - MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
+      - LANGUAGE_HINT=${LANGUAGE_HINT:-}
+      - LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
+      - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
+      # Other services
+      - POSTGRES_HOST=relational_db
+      - POSTGRES_DEFAULT_SCHEMA=${POSTGRES_DEFAULT_SCHEMA:-}
+      - VESPA_HOST=index
+      - REDIS_HOST=cache
+      - WEB_DOMAIN=${WEB_DOMAIN:-}
+      # Don't change the NLP model configs unless you know what you're doing
+      - EMBEDDING_BATCH_SIZE=${EMBEDDING_BATCH_SIZE:-}
+      - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
+      - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}
+      - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
+      - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-}
+      - DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-}
+      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
+      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
+      - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-}
+      - LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-}
+      - LOG_INDIVIDUAL_MODEL_TOKENS=${LOG_INDIVIDUAL_MODEL_TOKENS:-}
+      - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
+      - LOG_ENDPOINT_LATENCY=${LOG_ENDPOINT_LATENCY:-}
+      - LOG_POSTGRES_LATENCY=${LOG_POSTGRES_LATENCY:-}
+      - LOG_POSTGRES_CONN_COUNTS=${LOG_POSTGRES_CONN_COUNTS:-}
+      - CELERY_BROKER_POOL_LIMIT=${CELERY_BROKER_POOL_LIMIT:-}
+      - LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS=${LITELLM_CUSTOM_ERROR_MESSAGE_MAPPINGS:-}
+      # Egnyte OAuth Configs
+      - EGNYTE_CLIENT_ID=${EGNYTE_CLIENT_ID:-}
+      - EGNYTE_CLIENT_SECRET=${EGNYTE_CLIENT_SECRET:-}
+      - EGNYTE_LOCALHOST_OVERRIDE=${EGNYTE_LOCALHOST_OVERRIDE:-}
+      # Linear OAuth Configs
+      - LINEAR_CLIENT_ID=${LINEAR_CLIENT_ID:-}
+      - LINEAR_CLIENT_SECRET=${LINEAR_CLIENT_SECRET:-}
+      # Analytics Configs
+      - SENTRY_DSN=${SENTRY_DSN:-}
+      # Chat Configs
+      - HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-}
+      # Enables the use of bedrock models or IAM Auth
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-}
+      - AWS_REGION_NAME=${AWS_REGION_NAME:-}
+      - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
+      # Seeding configuration
+      - USE_IAM_AUTH=${USE_IAM_AUTH:-}
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+
+  background:
+    image: onyxdotapp/onyx-backend:${IMAGE_TAG:-latest}
+    build:
+      context: ../../backend
+      dockerfile: Dockerfile
+    command: >
+      /bin/sh -c "
+      if [ -f /etc/ssl/certs/custom-ca.crt ]; then
+        update-ca-certificates;
+      fi &&
+      /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf"
+    depends_on:
+      - relational_db
+      - index
+      - cache
+      - inference_model_server
+      - indexing_model_server
+    restart: always
+    environment:
+      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true
+      - MULTI_TENANT=true
+      - LOG_LEVEL=DEBUG
+      - AUTH_TYPE=cloud
+      - REQUIRE_EMAIL_VERIFICATION=false
+      - DISABLE_TELEMETRY=true
+      - IMAGE_TAG=test
+      - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-}
+      - JWT_PUBLIC_KEY_URL=${JWT_PUBLIC_KEY_URL:-}
+      # Gen AI Settings (Needed by OnyxBot)
+      - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-}
+      - QA_TIMEOUT=${QA_TIMEOUT:-}
+      - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-}
+      - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
+      - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
+      - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
+      - GENERATIVE_MODEL_ACCESS_CHECK_FREQ=${GENERATIVE_MODEL_ACCESS_CHECK_FREQ:-}
+      - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
+      - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-}
+      - GEN_AI_API_KEY=${GEN_AI_API_KEY:-}
+      - BING_API_KEY=${BING_API_KEY:-}
+      # Query Options
+      - DOC_TIME_DECAY=${DOC_TIME_DECAY:-}
+      - HYBRID_ALPHA=${HYBRID_ALPHA:-}
+      - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
+      - MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
+      - LANGUAGE_HINT=${LANGUAGE_HINT:-}
+      - LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
+      - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
+      # Other Services
+      - POSTGRES_HOST=relational_db
+      - POSTGRES_USER=${POSTGRES_USER:-}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-}
+      - POSTGRES_DB=${POSTGRES_DB:-}
+      - POSTGRES_DEFAULT_SCHEMA=${POSTGRES_DEFAULT_SCHEMA:-}
+      - VESPA_HOST=index
+      - REDIS_HOST=cache
+      - WEB_DOMAIN=${WEB_DOMAIN:-}
+      # Don't change the NLP model configs unless you know what you're doing
+      - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
+      - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-}
+      - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-}
+      - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-}
+      - ASYM_PASSAGE_PREFIX=${ASYM_PASSAGE_PREFIX:-}
+      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
+      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
+      - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
+      # Indexing Configs
+      - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-}
+      - NUM_INDEXING_WORKERS=${NUM_INDEXING_WORKERS:-}
+      - ENABLED_CONNECTOR_TYPES=${ENABLED_CONNECTOR_TYPES:-}
+      - DISABLE_INDEX_UPDATE_ON_SWAP=${DISABLE_INDEX_UPDATE_ON_SWAP:-}
+      - DASK_JOB_CLIENT_ENABLED=${DASK_JOB_CLIENT_ENABLED:-}
+      - CONTINUE_ON_CONNECTOR_FAILURE=${CONTINUE_ON_CONNECTOR_FAILURE:-}
+      - EXPERIMENTAL_CHECKPOINTING_ENABLED=${EXPERIMENTAL_CHECKPOINTING_ENABLED:-}
+      - CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=${CONFLUENCE_CONNECTOR_LABELS_TO_SKIP:-}
+      - JIRA_CONNECTOR_LABELS_TO_SKIP=${JIRA_CONNECTOR_LABELS_TO_SKIP:-}
+      - WEB_CONNECTOR_VALIDATE_URLS=${WEB_CONNECTOR_VALIDATE_URLS:-}
+      - JIRA_API_VERSION=${JIRA_API_VERSION:-}
+      - GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-}
+      - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-}
+      - GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-}
+      - MAX_DOCUMENT_CHARS=${MAX_DOCUMENT_CHARS:-}
+      - MAX_FILE_SIZE_BYTES=${MAX_FILE_SIZE_BYTES:-}
+      # Egnyte OAuth Configs
+      - EGNYTE_CLIENT_ID=${EGNYTE_CLIENT_ID:-}
+      - EGNYTE_CLIENT_SECRET=${EGNYTE_CLIENT_SECRET:-}
+      - EGNYTE_LOCALHOST_OVERRIDE=${EGNYTE_LOCALHOST_OVERRIDE:-}
+      # Lienar OAuth Configs
+      - LINEAR_CLIENT_ID=${LINEAR_CLIENT_ID:-}
+      - LINEAR_CLIENT_SECRET=${LINEAR_CLIENT_SECRET:-}
+      # Celery Configs (defaults are set in the supervisord.conf file.
+      # prefer doing that to have one source of defaults)
+      - CELERY_WORKER_INDEXING_CONCURRENCY=${CELERY_WORKER_INDEXING_CONCURRENCY:-}
+      - CELERY_WORKER_LIGHT_CONCURRENCY=${CELERY_WORKER_LIGHT_CONCURRENCY:-}
+      - CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER=${CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER:-}
+
+      # Onyx SlackBot Configs
+      - DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER=${DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER:-}
+      - DANSWER_BOT_FEEDBACK_VISIBILITY=${DANSWER_BOT_FEEDBACK_VISIBILITY:-}
+      - DANSWER_BOT_DISPLAY_ERROR_MSGS=${DANSWER_BOT_DISPLAY_ERROR_MSGS:-}
+      - DANSWER_BOT_RESPOND_EVERY_CHANNEL=${DANSWER_BOT_RESPOND_EVERY_CHANNEL:-}
+      - DANSWER_BOT_DISABLE_COT=${DANSWER_BOT_DISABLE_COT:-} # Currently unused
+      - NOTIFY_SLACKBOT_NO_ANSWER=${NOTIFY_SLACKBOT_NO_ANSWER:-}
+      - DANSWER_BOT_MAX_QPM=${DANSWER_BOT_MAX_QPM:-}
+      - DANSWER_BOT_MAX_WAIT_TIME=${DANSWER_BOT_MAX_WAIT_TIME:-}
+      # Logging
+      # Leave this on pretty please? Nothing sensitive is collected!
+      # https://docs.onyx.app/more/telemetry
+      - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}
+      - LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs
+      - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # LiteLLM Verbose Logging
+      # Log all of Onyx prompts and interactions with the LLM
+      - LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-}
+      - LOG_INDIVIDUAL_MODEL_TOKENS=${LOG_INDIVIDUAL_MODEL_TOKENS:-}
+      - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
+
+      # Analytics Configs
+      - SENTRY_DSN=${SENTRY_DSN:-}
+
+      # Enterprise Edition stuff
+      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
+      - USE_IAM_AUTH=${USE_IAM_AUTH:-}
+      - AWS_REGION_NAME=${AWS_REGION_NAME:-}
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID-}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY-}
+    # Uncomment the line below to use if IAM_AUTH is true and you are using iam auth for postgres
+    # volumes:
+    #   - ./bundle.pem:/app/bundle.pem:ro
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+    # Uncomment the following lines if you need to include a custom CA certificate
+    # This section enables the use of a custom CA certificate
+    # If present, the custom CA certificate is mounted as a volume
+    # The container checks for its existence and updates the system's CA certificates
+    # This allows for secure communication with services using custom SSL certificates
+    # Optional volume mount for CA certificate
+    # volumes:
+    #   # Maps to the CA_CERT_PATH environment variable in the Dockerfile
+    #   - ${CA_CERT_PATH:-./custom-ca.crt}:/etc/ssl/certs/custom-ca.crt:ro
+
+  web_server:
+    image: onyxdotapp/onyx-web-server:${IMAGE_TAG:-latest}
+    build:
+      context: ../../web
+      dockerfile: Dockerfile
+      args:
+        - NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING:-false}
+        - NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA:-false}
+        - NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-}
+        - NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-}
+        - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-}
+        - NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN=${NEXT_PUBLIC_DEFAULT_SIDEBAR_OPEN:-}
+        - NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED:-}
+        # Enterprise Edition only
+        - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-}
+        # DO NOT TURN ON unless you have EXPLICIT PERMISSION from Onyx.
+        - NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED=${NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED:-false}
+    depends_on:
+      - api_server
+    restart: always
+    environment:
+      - INTERNAL_URL=http://api_server:8080
+      - WEB_DOMAIN=${WEB_DOMAIN:-}
+      - THEME_IS_DARK=${THEME_IS_DARK:-}
+      - DISABLE_LLM_DOC_RELEVANCE=${DISABLE_LLM_DOC_RELEVANCE:-}
+
+      # Enterprise Edition only
+      - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
+      - NEXT_PUBLIC_CUSTOM_REFRESH_URL=${NEXT_PUBLIC_CUSTOM_REFRESH_URL:-}
+
+  inference_model_server:
+    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
+    build:
+      context: ../../backend
+      dockerfile: Dockerfile.model_server
+    command: >
+      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then
+        echo 'Skipping service...';
+        exit 0;
+      else
+        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
+      fi"
+    restart: on-failure
+    environment:
+      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
+      # Set to debug to get more fine-grained logs
+      - LOG_LEVEL=${LOG_LEVEL:-info}
+
+      # Analytics Configs
+      - SENTRY_DSN=${SENTRY_DSN:-}
+    volumes:
+      # Not necessary, this is just to reduce download time during startup
+      - model_cache_huggingface:/root/.cache/huggingface/
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+
+  indexing_model_server:
+    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
+    build:
+      context: ../../backend
+      dockerfile: Dockerfile.model_server
+    command: >
+      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then
+        echo 'Skipping service...';
+        exit 0;
+      else
+        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
+      fi"
+    restart: on-failure
+    environment:
+      - INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-}
+      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
+      - INDEXING_ONLY=True
+      # Set to debug to get more fine-grained logs
+      - LOG_LEVEL=${LOG_LEVEL:-info}
+      - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}
+
+      # Analytics Configs
+      - SENTRY_DSN=${SENTRY_DSN:-}
+    volumes:
+      # Not necessary, this is just to reduce download time during startup
+      - indexing_huggingface_model_cache:/root/.cache/huggingface/
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+
+  relational_db:
+    image: postgres:15.2-alpine
+    command: -c 'max_connections=250'
+    restart: always
+    environment:
+      - POSTGRES_USER=${POSTGRES_USER:-postgres}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}
+    ports:
+      - "5432:5432"
+    volumes:
+      - db_volume:/var/lib/postgresql/data
+
+  # This container name cannot have an underscore in it due to Vespa expectations of the URL
+  index:
+    image: vespaengine/vespa:8.277.17
+    restart: always
+    ports:
+      - "19071:19071"
+      - "8081:8081"
+    volumes:
+      - vespa_volume:/opt/vespa/var
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+
+  nginx:
+    image: nginx:1.23.4-alpine
+    restart: always
+    # nginx will immediately crash with `nginx: [emerg] host not found in upstream`
+    # if api_server / web_server are not up
+    depends_on:
+      - api_server
+      - web_server
+    environment:
+      - DOMAIN=localhost
+    ports:
+      - "80:80"
+      - "3000:80" # allow for localhost:3000 usage, since that is the norm
+    volumes:
+      - ../data/nginx:/etc/nginx/conf.d
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+    # The specified script waits for the api_server to start up.
+    # Without this we've seen issues where nginx shows no error logs but
+    # does not recieve any traffic
+    # NOTE: we have to use dos2unix to remove Carriage Return chars from the file
+    # in order to make this work on both Unix-like systems and windows
+    command: >
+      /bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh 
+      && /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev"
+
+  cache:
+    image: redis:7.4-alpine
+    restart: always
+    ports:
+      - "6379:6379"
+    # docker silently mounts /data even without an explicit volume mount, which enables
+    # persistence. explicitly setting save and appendonly forces ephemeral behavior.
+    command: redis-server --save "" --appendonly no
+
+volumes:
+  db_volume:
+  vespa_volume: # Created by the container itself
+
+  model_cache_huggingface:
+  indexing_huggingface_model_cache:
--- a/deployment/helm/charts/onyx/Chart.lock
+++ b/deployment/helm/charts/onyx/Chart.lock
@@ -4,12 +4,12 @@ dependencies:
  version: 14.3.1
 - name: vespa
  repository: https://onyx-dot-app.github.io/vespa-helm-charts
-  version: 0.2.18
+  version: 0.2.20
 - name: nginx
  repository: oci://registry-1.docker.io/bitnamicharts
  version: 15.14.0
 - name: redis
  repository: https://charts.bitnami.com/bitnami
  version: 20.1.0
-digest: sha256:5c9eb3d55d5f8e3beb64f26d26f686c8d62755daa10e2e6d87530bdf2fbbf957
-generated: "2024-12-10T10:47:35.812483-08:00"
+digest: sha256:4615c033064a987e3f66a48f4744d2e88bd1cc932c79453c4928455695a72778
+generated: "2025-02-04T11:45:05.39228-08:00"
--- a/deployment/helm/charts/onyx/Chart.yaml
+++ b/deployment/helm/charts/onyx/Chart.yaml
@@ -23,7 +23,7 @@ dependencies:
    repository: https://charts.bitnami.com/bitnami
    condition: postgresql.enabled
  - name: vespa
-    version: 0.2.18
+    version: 0.2.20
    repository: https://onyx-dot-app.github.io/vespa-helm-charts
    condition: vespa.enabled
  - name: nginx
--- a/deployment/helm/charts/onyx/templates/configmap.yaml
+++ b/deployment/helm/charts/onyx/templates/configmap.yaml
@@ -7,10 +7,10 @@ metadata:
 data:
  INTERNAL_URL: "http://{{ include "onyx-stack.fullname" . }}-api-service:{{ .Values.api.service.port | default 8080 }}"
  POSTGRES_HOST: {{ .Release.Name }}-postgresql
-  VESPA_HOST: da-vespa-0.vespa-service
+  VESPA_HOST: {{ .Values.vespa.name }}.{{ .Values.vespa.service.name }}.{{ .Release.Namespace }}.svc.cluster.local
  REDIS_HOST: {{ .Release.Name }}-redis-master
  MODEL_SERVER_HOST: "{{ include "onyx-stack.fullname" . }}-inference-model-service"
  INDEXING_MODEL_SERVER_HOST: "{{ include "onyx-stack.fullname" . }}-indexing-model-service"
 {{- range $key, $value := .Values.configMap }}
  {{ $key }}: "{{ $value }}"
-{{- end }}
+{{- end }}
--- a/deployment/helm/charts/onyx/values.yaml
+++ b/deployment/helm/charts/onyx/values.yaml
@@ -5,6 +5,7 @@
 postgresql:
  primary:
    persistence:
+      storageClass: ""
      size: 5Gi
  enabled: true
  auth:
@@ -12,13 +13,52 @@ postgresql:
    secretKeys:
      # overwriting as postgres typically expects 'postgres-password'
      adminPasswordKey: postgres_password
-imagePullSecrets: []
-nameOverride: ""
-fullnameOverride: ""
+
+vespa:
+  name: da-vespa-0
+  service:
+    name: vespa-service
+  volumeClaimTemplates:
+    - metadata:
+        name: vespa-storage
+      spec:
+        accessModes:
+          - ReadWriteOnce
+        resources:
+          requests:
+            storage: 1Gi
+        storageClassName: ""
+  enabled: true
+  replicaCount: 1
+  image:
+    repository: vespa
+    pullPolicy: IfNotPresent
+    tag: "8.277.17"
+  podAnnotations: {}
+  podLabels:
+    app: vespa
+    app.kubernetes.io/instance: onyx
+    app.kubernetes.io/name: vespa
+  securityContext:
+    privileged: true
+    runAsUser: 0
+  resources:
+    # The Vespa Helm chart specifies default resources, which are quite modest. We override
+    # them here to increase chances of the chart running successfully.
+    requests:
+      cpu: 1500m
+      memory: 4000Mi
+    limits:
+      cpu: 1500m
+      memory: 4000Mi

 persistent:
  storageClassName: ""

+imagePullSecrets: []
+nameOverride: ""
+fullnameOverride: ""
+
 inferenceCapability:
  service:
    portName: modelserver
@@ -272,15 +312,9 @@ background:
  podSecurityContext:
    {}
    # fsGroup: 2000
-
  securityContext:
-    {}
-    # capabilities:
-    #   drop:
-    #   - ALL
-    # readOnlyRootFilesystem: true
-    # runAsNonRoot: true
-    # runAsUser: 1000
+    privileged: true
+    runAsUser: 0
  enableMiniChunk: "true"
  resources: {}
  # We usually recommend not to specify default resources and to leave this as a conscious
@@ -316,50 +350,6 @@ background:

  nodeSelector: {}
  tolerations: []
-
-vespa:
-  volumeClaimTemplates:
-    - metadata:
-        name: vespa-storage
-      spec:
-        accessModes:
-          - ReadWriteOnce
-        storageClassName: ""
-        resources:
-          requests:
-            storage: 1Gi
-  enabled: true
-  replicaCount: 1
-  image:
-    repository: vespa
-    pullPolicy: IfNotPresent
-    tag: "8.277.17"
-  podAnnotations: {}
-  podLabels:
-    app: vespa
-    app.kubernetes.io/instance: onyx
-    app.kubernetes.io/name: vespa
-
-  podSecurityContext:
-    {}
-    # fsGroup: 2000
-
-  securityContext:
-    privileged: true
-    runAsUser: 0
-
-  resources:
-    # The Vespa Helm chart specifies default resources, which are quite modest. We override
-    # them here to increase chances of the chart running successfully.
-    requests:
-      cpu: 1500m
-      memory: 4000Mi
-    limits:
-      cpu: 1500m
-      memory: 4000Mi
-
-  nodeSelector: {}
-  tolerations: []
  affinity: {}

 redis:
--- a/web/.gitignore
+++ b/web/.gitignore
@@ -35,6 +35,8 @@ yarn-error.log*
 *.tsbuildinfo
 next-env.d.ts

+# playwright testing temp files
 /admin_auth.json
+/user_auth.json
 /build-archive.log
-
+/test-results
--- a/web/Dockerfile
+++ b/web/Dockerfile
@@ -81,6 +81,9 @@ ENV NEXT_PUBLIC_GTM_ENABLED=${NEXT_PUBLIC_GTM_ENABLED}
 ARG NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED
 ENV NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED}

+ARG NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK
+ENV NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=${NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK}
+
 # Use NODE_OPTIONS in the build command
 RUN NODE_OPTIONS="${NODE_OPTIONS}" npx next build

@@ -160,6 +163,9 @@ ENV NEXT_PUBLIC_GTM_ENABLED=${NEXT_PUBLIC_GTM_ENABLED}
 ARG NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED
 ENV NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED=${NEXT_PUBLIC_FORGOT_PASSWORD_ENABLED}

+ARG NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK
+ENV NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK=${NEXT_PUBLIC_INCLUDE_ERROR_POPUP_SUPPORT_LINK}
+
 # Note: Don't expose ports here, Compose will handle that for us if necessary. 
 # If you want to run this without compose, specify the ports to 
 # expose via cli
--- a/web/README.md
+++ b/web/README.md
@@ -21,3 +21,42 @@ Open [http://localhost:3000](http://localhost:3000) with your browser to see the

 _Note:_ if you are having problems accessing the ^, try setting the `WEB_DOMAIN` env variable to
 `http://127.0.0.1:3000` and accessing it there.
+
+## Testing
+This testing process will reset your application into a clean state. 
+Don't run these tests if you don't want to do this!
+
+Bring up the entire application.
+
+
+1. Reset the instance
+
+```cd backend
+export PYTEST_IGNORE_SKIP=true
+pytest -s tests/integration/tests/playwright/test_playwright.py
+```
+
+2. Run playwright
+
+```
+cd web
+npx playwright test
+```
+
+3. Inspect results
+
+By default, playwright.config.ts is configured to output the results to:
+
+```
+web/test-results
+```
+
+4. Upload results to Chromatic (Optional)
+
+This step would normally not be run by third party developers, but first party devs
+may use this for local troubleshooting and testing.
+
+```
+cd web
+npx chromatic --playwright --project-token={your token here}
+```
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -15,6 +15,7 @@
        "@headlessui/react": "^2.2.0",
        "@headlessui/tailwindcss": "^0.2.1",
        "@phosphor-icons/react": "^2.0.8",
+        "@radix-ui/react-accordion": "^1.2.2",
        "@radix-ui/react-checkbox": "^1.1.2",
        "@radix-ui/react-collapsible": "^1.1.2",
        "@radix-ui/react-dialog": "^1.1.2",
@@ -83,11 +84,11 @@
        "yup": "^1.4.0"
      },
      "devDependencies": {
-        "@chromatic-com/playwright": "^0.10.0",
+        "@chromatic-com/playwright": "^0.10.2",
        "@tailwindcss/typography": "^0.5.10",
        "@types/chrome": "^0.0.287",
        "@types/jest": "^29.5.14",
-        "chromatic": "^11.18.1",
+        "chromatic": "^11.25.2",
        "eslint": "^8.48.0",
        "eslint-config-next": "^14.1.0",
        "jest": "^29.7.0",
@@ -756,9 +757,9 @@
      "license": "MIT"
    },
    "node_modules/@chromatic-com/playwright": {
-      "version": "0.10.0",
-      "resolved": "https://registry.npmjs.org/@chromatic-com/playwright/-/playwright-0.10.0.tgz",
-      "integrity": "sha512-QjKnOfuIcq9Y97QwA3MMVzOceXn1ikelUeC8gy60d2PbsQ2NNxH2n/PrAJ8Sllr225mXD1ts9xBH+Hq3+Blo5A==",
+      "version": "0.10.2",
+      "resolved": "https://registry.npmjs.org/@chromatic-com/playwright/-/playwright-0.10.2.tgz",
+      "integrity": "sha512-SfP4I0rWPeSNW5VtV7eiuNSsZYK9IdVPTBT1SnUFJd3lACS1YJJd5s8pTisJvgh5Q8u9VNGWXfeuV3ddGJyRtw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
@@ -3442,6 +3443,140 @@
      "integrity": "sha512-4Z8dn6Upk0qk4P74xBhZ6Hd/w0mPEzOOLxy4xiPXOXqjF7jZS0VAKk7/x/H6FyY2zCkYJqePf1G5KmkmNJ4RBA==",
      "license": "MIT"
    },
+    "node_modules/@radix-ui/react-accordion": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-accordion/-/react-accordion-1.2.2.tgz",
+      "integrity": "sha512-b1oh54x4DMCdGsB4/7ahiSrViXxaBwRPotiZNnYXjLha9vfuURSAZErki6qjDoSIV0eXx5v57XnTGVtGwnfp2g==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/primitive": "1.1.1",
+        "@radix-ui/react-collapsible": "1.1.2",
+        "@radix-ui/react-collection": "1.1.1",
+        "@radix-ui/react-compose-refs": "1.1.1",
+        "@radix-ui/react-context": "1.1.1",
+        "@radix-ui/react-direction": "1.1.0",
+        "@radix-ui/react-id": "1.1.0",
+        "@radix-ui/react-primitive": "2.0.1",
+        "@radix-ui/react-use-controllable-state": "1.1.0"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/primitive": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.1.tgz",
+      "integrity": "sha512-SJ31y+Q/zAyShtXJc8x83i9TYdbAfHZ++tUZnvjJJqFjzsdUnKsxPL6IEtBlxKkU7yzer//GQtZSV4GbldL3YA==",
+      "license": "MIT"
+    },
+    "node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/react-collection": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.1.tgz",
+      "integrity": "sha512-LwT3pSho9Dljg+wY2KN2mrrh6y3qELfftINERIzBUO9e0N+t0oMTyn3k9iv+ZqgrwGkRnLpNJrsMv9BZlt2yuA==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.1",
+        "@radix-ui/react-context": "1.1.1",
+        "@radix-ui/react-primitive": "2.0.1",
+        "@radix-ui/react-slot": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/react-compose-refs": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.1.tgz",
+      "integrity": "sha512-Y9VzoRDSJtgFMUCoiZBDVo084VQ5hfpXxVE+NgkdNsjiDBByiImMZKKhxMwCbdHvhlENG6a833CbFkOQvTricw==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/react-context": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.1.tgz",
+      "integrity": "sha512-UASk9zi+crv9WteK/NU4PLvOoL3OuE6BWVKNF6hPRBtYBDXQ2u5iu3O59zUlJiTVvkyuycnqrztsHVJwcK9K+Q==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/react-primitive": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.0.1.tgz",
+      "integrity": "sha512-sHCWTtxwNn3L3fH8qAfnF3WbUZycW93SM1j3NFDzXBiz8D6F5UTTy8G1+WFEaiCdvCVRJWj6N2R4Xq6HdiHmDg==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-slot": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "@types/react-dom": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
+        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        },
+        "@types/react-dom": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/@radix-ui/react-accordion/node_modules/@radix-ui/react-slot": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.1.1.tgz",
+      "integrity": "sha512-RApLLOcINYJA+dMVbOju7MYv1Mb2EBp2nH4HdDzXTSyaR5optlm6Otrz1euW3HbdOR8UmmFK06TD+A9frYWv+g==",
+      "license": "MIT",
+      "dependencies": {
+        "@radix-ui/react-compose-refs": "1.1.1"
+      },
+      "peerDependencies": {
+        "@types/react": "*",
+        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+      },
+      "peerDependenciesMeta": {
+        "@types/react": {
+          "optional": true
+        }
+      }
+    },
    "node_modules/@radix-ui/react-arrow": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.0.tgz",
@@ -8442,9 +8577,9 @@
      }
    },
    "node_modules/chromatic": {
-      "version": "11.18.1",
-      "resolved": "https://registry.npmjs.org/chromatic/-/chromatic-11.18.1.tgz",
-      "integrity": "sha512-hkNT9vA6K9+PnE/khhZYBnRCOm8NonaQDs7RZ8YHFo7/lh1b/x/uFMkTjWjaj/mkM6QOR/evu5VcZMtcaauSlw==",
+      "version": "11.25.2",
+      "resolved": "https://registry.npmjs.org/chromatic/-/chromatic-11.25.2.tgz",
+      "integrity": "sha512-/9eQWn6BU1iFsop86t8Au21IksTRxwXAl7if8YHD05L2AbuMjClLWZo5cZojqrJHGKDhTqfrC2X2xE4uSm0iKw==",
      "dev": true,
      "license": "MIT",
      "bin": {
--- a/web/package.json
+++ b/web/package.json
@@ -18,6 +18,7 @@
    "@headlessui/react": "^2.2.0",
    "@headlessui/tailwindcss": "^0.2.1",
    "@phosphor-icons/react": "^2.0.8",
+    "@radix-ui/react-accordion": "^1.2.2",
    "@radix-ui/react-checkbox": "^1.1.2",
    "@radix-ui/react-collapsible": "^1.1.2",
    "@radix-ui/react-dialog": "^1.1.2",
@@ -86,11 +87,11 @@
    "yup": "^1.4.0"
  },
  "devDependencies": {
-    "@chromatic-com/playwright": "^0.10.0",
+    "@chromatic-com/playwright": "^0.10.2",
    "@tailwindcss/typography": "^0.5.10",
    "@types/chrome": "^0.0.287",
    "@types/jest": "^29.5.14",
-    "chromatic": "^11.18.1",
+    "chromatic": "^11.25.2",
    "eslint": "^8.48.0",
    "eslint-config-next": "^14.1.0",
    "jest": "^29.7.0",
--- a/web/playwright.config.ts
+++ b/web/playwright.config.ts
@@ -2,7 +2,19 @@ import { defineConfig, devices } from "@playwright/test";

 export default defineConfig({
  globalSetup: require.resolve("./tests/e2e/global-setup"),
-  timeout: 30000, // 30 seconds timeout
+  timeout: 60000, // 60 seconds timeout
+  reporter: [
+    ["list"],
+    // Warning: uncommenting the html reporter may cause the chromatic-archives
+    // directory to be deleted after the test run, which will break CI.
+    // [
+    //   'html',
+    //   {
+    //     outputFolder: 'test-results', // or whatever directory you want
+    //     open: 'never', // can be 'always' | 'on-failure' | 'never'
+    //   },
+    // ],
+  ],
  projects: [
    {
      name: "admin",
--- a/web/src/app/admin/api-key/page.tsx
+++ b/web/src/app/admin/api-key/page.tsx
@@ -28,6 +28,7 @@ import { Spinner } from "@/components/Spinner";
 import { deleteApiKey, regenerateApiKey } from "./lib";
 import { OnyxApiKeyForm } from "./OnyxApiKeyForm";
 import { APIKey } from "./types";
+import CreateButton from "@/components/ui/createButton";

 const API_KEY_TEXT = `API Keys allow you to access Onyx APIs programmatically. Click the button below to generate a new API Key.`;

@@ -111,14 +112,7 @@ function Main() {
  }

  const newApiKeyButton = (
-    <Button
-      variant="navigate"
-      size="sm"
-      className="mt-3"
-      onClick={() => setShowCreateUpdateForm(true)}
-    >
-      Create API Key
-    </Button>
+    <CreateButton href="/admin/api-key/new" text="Create API Key" />
  );

  if (apiKeys.length === 0) {
--- a/web/src/app/admin/assistants/AssistantEditor.tsx
+++ b/web/src/app/admin/assistants/AssistantEditor.tsx
@@ -40,7 +40,12 @@ import * as Yup from "yup";
 import CollapsibleSection from "./CollapsibleSection";
 import { SuccessfulPersonaUpdateRedirectType } from "./enums";
 import { Persona, PersonaLabel, StarterMessage } from "./interfaces";
-import { PersonaUpsertParameters, createPersona, updatePersona } from "./lib";
+import {
+  PersonaUpsertParameters,
+  createPersona,
+  updatePersona,
+  deletePersona,
+} from "./lib";
 import {
  CameraIcon,
  GroupsIconSkeleton,
@@ -71,7 +76,6 @@ import { LLMSelector } from "@/components/llm/LLMSelector";
 import useSWR from "swr";
 import { errorHandlingFetcher } from "@/lib/fetcher";
 import { DeleteEntityModal } from "@/components/modals/DeleteEntityModal";
-import { DeletePersonaButton } from "./[id]/DeletePersonaButton";
 import Title from "@/components/ui/title";
 import { SEARCH_TOOL_ID } from "@/app/chat/tools/constants";

@@ -322,10 +326,39 @@ export function AssistantEditor({
    }));
  };

+  const [deleteModalOpen, setDeleteModalOpen] = useState(false);
+
  if (!labels) {
    return <></>;
  }

+  const openDeleteModal = () => {
+    setDeleteModalOpen(true);
+  };
+
+  const closeDeleteModal = () => {
+    setDeleteModalOpen(false);
+  };
+
+  const handleDeletePersona = async () => {
+    if (existingPersona) {
+      const response = await deletePersona(existingPersona.id);
+      if (response.ok) {
+        await refreshAssistants();
+        router.push(
+          redirectType === SuccessfulPersonaUpdateRedirectType.ADMIN
+            ? `/admin/assistants?u=${Date.now()}`
+            : `/chat`
+        );
+      } else {
+        setPopup({
+          type: "error",
+          message: `Failed to delete persona - ${await response.text()}`,
+        });
+      }
+    }
+  };
+
  return (
    <div className="mx-auto max-w-4xl">
      <style>
@@ -364,6 +397,14 @@ export function AssistantEditor({
          }}
        />
      )}
+      {deleteModalOpen && existingPersona && (
+        <DeleteEntityModal
+          entityType="Persona"
+          entityName={existingPersona.name}
+          onClose={closeDeleteModal}
+          onSubmit={handleDeletePersona}
+        />
+      )}
      {popup}
      <Formik
        enableReinitialize={true}
@@ -1312,14 +1353,6 @@ export function AssistantEditor({
                    explanationLink="https://docs.onyx.app/guides/assistants"
                    className="[&_textarea]:placeholder:text-text-muted/50"
                  />
-                  <div className="flex justify-end">
-                    {existingPersona && (
-                      <DeletePersonaButton
-                        personaId={existingPersona!.id}
-                        redirectType={SuccessfulPersonaUpdateRedirectType.ADMIN}
-                      />
-                    )}
-                  </div>
                </>
              )}

@@ -1338,6 +1371,18 @@ export function AssistantEditor({
                  Cancel
                </Button>
              </div>
+
+              <div className="flex justify-end">
+                {existingPersona && (
+                  <Button
+                    variant="destructive"
+                    onClick={openDeleteModal}
+                    type="button"
+                  >
+                    Delete
+                  </Button>
+                )}
+              </div>
            </Form>
          );
        }}
--- a/web/src/app/admin/assistants/PersonaTable.tsx
+++ b/web/src/app/admin/assistants/PersonaTable.tsx
@@ -17,6 +17,7 @@ import { FiEdit2 } from "react-icons/fi";
 import { TrashIcon } from "@/components/icons/icons";
 import { useUser } from "@/components/user/UserProvider";
 import { useAssistants } from "@/components/context/AssistantsContext";
+import { DeleteEntityModal } from "@/components/modals/DeleteEntityModal";

 function PersonaTypeDisplay({ persona }: { persona: Persona }) {
  if (persona.builtin_persona) {
@@ -53,6 +54,8 @@ export function PersonasTable() {
  }, [editablePersonas]);

  const [finalPersonas, setFinalPersonas] = useState<Persona[]>([]);
+  const [deleteModalOpen, setDeleteModalOpen] = useState(false);
+  const [personaToDelete, setPersonaToDelete] = useState<Persona | null>(null);

  useEffect(() => {
    const editable = editablePersonas.sort(personaComparator);
@@ -98,9 +101,42 @@ export function PersonasTable() {
    await refreshUser();
  };

+  const openDeleteModal = (persona: Persona) => {
+    setPersonaToDelete(persona);
+    setDeleteModalOpen(true);
+  };
+
+  const closeDeleteModal = () => {
+    setDeleteModalOpen(false);
+    setPersonaToDelete(null);
+  };
+
+  const handleDeletePersona = async () => {
+    if (personaToDelete) {
+      const response = await deletePersona(personaToDelete.id);
+      if (response.ok) {
+        await refreshAssistants();
+        closeDeleteModal();
+      } else {
+        setPopup({
+          type: "error",
+          message: `Failed to delete persona - ${await response.text()}`,
+        });
+      }
+    }
+  };
+
  return (
    <div>
      {popup}
+      {deleteModalOpen && personaToDelete && (
+        <DeleteEntityModal
+          entityType="Persona"
+          entityName={personaToDelete.name}
+          onClose={closeDeleteModal}
+          onSubmit={handleDeletePersona}
+        />
+      )}

      <DraggableTable
        headers={["Name", "Description", "Type", "Is Visible", "Delete"]}
@@ -170,16 +206,7 @@ export function PersonasTable() {
                  {!persona.builtin_persona && isEditable ? (
                    <div
                      className="hover:bg-hover rounded p-1 cursor-pointer"
-                      onClick={async () => {
-                        const response = await deletePersona(persona.id);
-                        if (response.ok) {
-                          await refreshAssistants();
-                        } else {
-                          alert(
-                            `Failed to delete persona - ${await response.text()}`
-                          );
-                        }
-                      }}
+                      onClick={() => openDeleteModal(persona)}
                    >
                      <TrashIcon />
                    </div>
--- a/web/src/app/admin/assistants/page.tsx
+++ b/web/src/app/admin/assistants/page.tsx
@@ -1,15 +1,12 @@
 "use client";
 import { PersonasTable } from "./PersonaTable";
-import { FiPlusSquare } from "react-icons/fi";
-import Link from "next/link";
 import Text from "@/components/ui/text";
 import Title from "@/components/ui/title";
 import { Separator } from "@/components/ui/separator";
 import { AssistantsIcon } from "@/components/icons/icons";
 import { AdminPageTitle } from "@/components/admin/Title";
-import LabelManagement from "./LabelManagement";
 import { SubLabel } from "@/components/admin/connectors/Field";
-
+import CreateButton from "@/components/ui/createButton";
 export default async function Page() {
  return (
    <div className="mx-auto container">
@@ -33,15 +30,7 @@ export default async function Page() {
        <Separator />

        <Title>Create an Assistant</Title>
-        <Link
-          href="/admin/assistants/new"
-          className="flex py-2 px-4 mt-2 border border-border h-fit cursor-pointer hover:bg-hover text-sm w-40"
-        >
-          <div className="mx-auto flex">
-            <FiPlusSquare className="my-auto mr-2" />
-            New Assistant
-          </div>
-        </Link>
+        <CreateButton href="/admin/assistants/new" text="New Assistant" />

        <Separator />

--- a/web/src/app/admin/bots/[bot-id]/SlackChannelConfigsTable.tsx
+++ b/web/src/app/admin/bots/[bot-id]/SlackChannelConfigsTable.tsx
@@ -49,7 +49,7 @@ export function SlackChannelConfigsTable({
          }}
        >
          <FiSettings />
-          Edit Default Config
+          Edit Default Configuration
        </Button>
        <Link href={`/admin/bots/${slackBotId}/channels/new`}>
          <Button variant="outline">
--- a/web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigCreationForm.tsx
+++ b/web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigCreationForm.tsx
@@ -45,13 +45,26 @@ export const SlackChannelConfigCreationForm = ({
  const existingSlackBotUsesPersona = existingSlackChannelConfig?.persona
    ? !isPersonaASlackBotPersona(existingSlackChannelConfig.persona)
    : false;
+  const existingPersonaHasSearchTool = existingSlackChannelConfig?.persona
+    ? existingSlackChannelConfig.persona.tools.some(
+        (tool) => tool.in_code_tool_id === SEARCH_TOOL_ID
+      )
+    : false;

-  const searchEnabledAssistants = useMemo(() => {
-    return personas.filter((persona) => {
-      return persona.tools.some(
-        (tool) => tool.in_code_tool_id == SEARCH_TOOL_ID
-      );
-    });
+  const [searchEnabledAssistants, nonSearchAssistants] = useMemo(() => {
+    return personas.reduce(
+      (acc, persona) => {
+        if (
+          persona.tools.some((tool) => tool.in_code_tool_id === SEARCH_TOOL_ID)
+        ) {
+          acc[0].push(persona);
+        } else {
+          acc[1].push(persona);
+        }
+        return acc;
+      },
+      [[], []] as [Persona[], Persona[]]
+    );
  }, [personas]);

  return (
@@ -105,7 +118,9 @@ export const SlackChannelConfigCreationForm = ({
          standard_answer_categories:
            existingSlackChannelConfig?.standard_answer_categories || [],
          knowledge_source: existingSlackBotUsesPersona
-            ? "assistant"
+            ? existingPersonaHasSearchTool
+              ? "assistant"
+              : "non_search_assistant"
            : existingSlackChannelConfig?.persona
              ? "document_sets"
              : "all_public",
@@ -148,7 +163,12 @@ export const SlackChannelConfigCreationForm = ({
            }),
          standard_answer_categories: Yup.array(),
          knowledge_source: Yup.string()
-            .oneOf(["all_public", "document_sets", "assistant"])
+            .oneOf([
+              "all_public",
+              "document_sets",
+              "assistant",
+              "non_search_assistant",
+            ])
            .required(),
        })}
        onSubmit={async (values, formikHelpers) => {
@@ -159,13 +179,16 @@ export const SlackChannelConfigCreationForm = ({
            slack_bot_id,
            channel_name: values.channel_name,
            respond_member_group_list: values.respond_member_group_list,
-            usePersona: values.knowledge_source === "assistant",
+            usePersona:
+              values.knowledge_source === "assistant" ||
+              values.knowledge_source === "non_search_assistant",
            document_sets:
              values.knowledge_source === "document_sets"
                ? values.document_sets
                : [],
            persona_id:
-              values.knowledge_source === "assistant"
+              values.knowledge_source === "assistant" ||
+              values.knowledge_source === "non_search_assistant"
                ? values.persona_id
                : null,
            standard_answer_categories: values.standard_answer_categories.map(
@@ -204,7 +227,7 @@ export const SlackChannelConfigCreationForm = ({
          }
        }}
      >
-        {({ isSubmitting, values, setFieldValue }) => (
+        {({ isSubmitting, values, setFieldValue, ...formikProps }) => (
          <Form>
            <div className="pb-6 w-full">
              <SlackChannelConfigFormFields
@@ -213,9 +236,11 @@ export const SlackChannelConfigCreationForm = ({
                isDefault={isDefault}
                documentSets={documentSets}
                searchEnabledAssistants={searchEnabledAssistants}
+                nonSearchAssistants={nonSearchAssistants}
                standardAnswerCategoryResponse={standardAnswerCategoryResponse}
                setPopup={setPopup}
                slack_bot_id={slack_bot_id}
+                formikProps={formikProps}
              />
            </div>
          </Form>
--- a/web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigFormFields.tsx
+++ b/web/src/app/admin/bots/[bot-id]/channels/SlackChannelConfigFormFields.tsx
@@ -10,7 +10,6 @@ import {
 } from "formik";
 import { CCPairDescriptor, DocumentSet } from "@/lib/types";
 import {
-  BooleanFormField,
  Label,
  SelectorFormField,
  SubLabel,
@@ -42,18 +41,29 @@ import { fetchSlackChannels } from "../lib";
 import { Badge } from "@/components/ui/badge";
 import useSWR from "swr";
 import { ThreeDotsLoader } from "@/components/Loading";
+import {
+  Accordion,
+  AccordionContent,
+  AccordionItem,
+  AccordionTrigger,
+} from "@/components/ui/accordion";
+import { Separator } from "@/components/ui/separator";
+
+import { CheckFormField } from "@/components/ui/CheckField";

 export interface SlackChannelConfigFormFieldsProps {
  isUpdate: boolean;
  isDefault: boolean;
  documentSets: DocumentSet[];
  searchEnabledAssistants: Persona[];
+  nonSearchAssistants: Persona[];
  standardAnswerCategoryResponse: StandardAnswerCategoryResponse;
  setPopup: (popup: {
    message: string;
    type: "error" | "success" | "warning";
  }) => void;
  slack_bot_id: number;
+  formikProps: any;
 }

 export function SlackChannelConfigFormFields({
@@ -61,15 +71,15 @@ export function SlackChannelConfigFormFields({
  isDefault,
  documentSets,
  searchEnabledAssistants,
+  nonSearchAssistants,
  standardAnswerCategoryResponse,
  setPopup,
  slack_bot_id,
+  formikProps,
 }: SlackChannelConfigFormFieldsProps) {
  const router = useRouter();
  const { values, setFieldValue } = useFormikContext<any>();
-  const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
  const [viewUnselectableSets, setViewUnselectableSets] = useState(false);
-  const [currentSearchTerm, setCurrentSearchTerm] = useState("");
  const [viewSyncEnabledAssistants, setViewSyncEnabledAssistants] =
    useState(false);

@@ -178,6 +188,7 @@ export function SlackChannelConfigFormFields({
      }));
    }
  );
+
  if (isLoading) {
    return <ThreeDotsLoader />;
  }
@@ -194,7 +205,7 @@ export function SlackChannelConfigFormFields({
          <>
            <label
              htmlFor="channel_name"
-              className="block font-medium text-base mb-2"
+              className="block  text-text font-medium text-base mb-2"
            >
              Select A Slack Channel:
            </label>{" "}
@@ -204,11 +215,9 @@ export function SlackChannelConfigFormFields({
                  options={channelOptions || []}
                  onSelect={(selected) => {
                    form.setFieldValue("channel_name", selected.name);
-                    setCurrentSearchTerm(selected.name);
                  }}
                  initialSearchTerm={field.value}
                  onSearchTermChange={(term) => {
-                    setCurrentSearchTerm(term);
                    form.setFieldValue("channel_name", term);
                  }}
                />
@@ -242,9 +251,15 @@ export function SlackChannelConfigFormFields({
            <RadioGroupItemField
              value="assistant"
              id="assistant"
-              label="Specific Assistant"
+              label="Search Assistant"
              sublabel="Control both the documents and the prompt to use for answering questions"
            />
+            <RadioGroupItemField
+              value="non_search_assistant"
+              id="non_search_assistant"
+              label="Non-Search Assistant"
+              sublabel="Chat with an assistant that does not use documents"
+            />
          </RadioGroup>
        </div>
        {values.knowledge_source === "document_sets" &&
@@ -408,118 +423,165 @@ export function SlackChannelConfigFormFields({
            )}
          </div>
        )}
-      </div>
+        {values.knowledge_source === "non_search_assistant" && (
+          <div className="mt-4">
+            <SubLabel>
+              <>
+                Select the non-search assistant OnyxBot will use while answering
+                questions in Slack.
+                {syncEnabledAssistants.length > 0 && (
+                  <>
+                    <br />
+                    <span className="text-sm text-text-dark/80">
+                      Note: Some of your assistants have auto-synced connectors
+                      in their document sets. You cannot select these assistants
+                      as they will not be able to answer questions in Slack.{" "}
+                      <button
+                        type="button"
+                        onClick={() =>
+                          setViewSyncEnabledAssistants(
+                            (viewSyncEnabledAssistants) =>
+                              !viewSyncEnabledAssistants
+                          )
+                        }
+                        className="text-sm text-link"
+                      >
+                        {viewSyncEnabledAssistants
+                          ? "Hide un-selectable "
+                          : "View all "}
+                        assistants
+                      </button>
+                    </span>
+                  </>
+                )}
+              </>
+            </SubLabel>

-      <div className="mt-6">
-        <AdvancedOptionsToggle
-          showAdvancedOptions={showAdvancedOptions}
-          setShowAdvancedOptions={setShowAdvancedOptions}
-        />
-      </div>
-      {showAdvancedOptions && (
-        <div className="mt-2 space-y-4">
-          <div className="w-64">
            <SelectorFormField
-              name="response_type"
-              label="Answer Type"
-              tooltip="Controls the format of OnyxBot's responses."
-              options={[
-                { name: "Standard", value: "citations" },
-                { name: "Detailed", value: "quotes" },
-              ]}
+              name="persona_id"
+              options={nonSearchAssistants.map((persona) => ({
+                name: persona.name,
+                value: persona.id,
+              }))}
            />
          </div>
+        )}
+      </div>
+      <Separator className="my-4" />
+      <Accordion type="multiple" className=" gap-y-2 w-full">
+        {values.knowledge_source !== "non_search_assistant" && (
+          <AccordionItem value="search-options">
+            <AccordionTrigger className="text-text">
+              Search Configuration
+            </AccordionTrigger>
+            <AccordionContent>
+              <div className="space-y-4">
+                <div className="w-64">
+                  <SelectorFormField
+                    name="response_type"
+                    label="Answer Type"
+                    tooltip="Controls the format of OnyxBot's responses."
+                    options={[
+                      { name: "Standard", value: "citations" },
+                      { name: "Detailed", value: "quotes" },
+                    ]}
+                  />
+                </div>
+                <CheckFormField
+                  name="enable_auto_filters"
+                  label="Enable LLM Autofiltering"
+                  tooltip="If set, the LLM will generate source and time filters based on the user's query"
+                />

-          <BooleanFormField
-            name="show_continue_in_web_ui"
-            removeIndent
-            label="Show Continue in Web UI button"
-            tooltip="If set, will show a button at the bottom of the response that allows the user to continue the conversation in the Onyx Web UI"
-          />
+                <CheckFormField
+                  name="answer_validity_check_enabled"
+                  label="Only respond if citations found"
+                  tooltip="If set, will only answer questions where the model successfully produces citations"
+                />
+              </div>
+            </AccordionContent>
+          </AccordionItem>
+        )}
+
+        <AccordionItem className="mt-4" value="general-options">
+          <AccordionTrigger>General Configuration</AccordionTrigger>
+          <AccordionContent>
+            <div className="space-y-4">
+              <CheckFormField
+                name="show_continue_in_web_ui"
+                label="Show Continue in Web UI button"
+                tooltip="If set, will show a button at the bottom of the response that allows the user to continue the conversation in the Onyx Web UI"
+              />
+
+              <CheckFormField
+                name="still_need_help_enabled"
+                onChange={(checked: boolean) => {
+                  setFieldValue("still_need_help_enabled", checked);
+                  if (!checked) {
+                    setFieldValue("follow_up_tags", []);
+                  }
+                }}
+                label={'Give a "Still need help?" button'}
+                tooltip={`OnyxBot's response will include a button at the bottom 
+                      of the response that asks the user if they still need help.`}
+              />
+              {values.still_need_help_enabled && (
+                <CollapsibleSection prompt="Configure Still Need Help Button">
+                  <TextArrayField
+                    name="follow_up_tags"
+                    label="(Optional) Users / Groups to Tag"
+                    values={values}
+                    subtext={
+                      <div>
+                        The Slack users / groups we should tag if the user
+                        clicks the &quot;Still need help?&quot; button. If no
+                        emails are provided, we will not tag anyone and will
+                        just react with a 🆘 emoji to the original message.
+                      </div>
+                    }
+                    placeholder="User email or user group name..."
+                  />
+                </CollapsibleSection>
+              )}
+
+              <CheckFormField
+                name="questionmark_prefilter_enabled"
+                label="Only respond to questions"
+                tooltip="If set, OnyxBot will only respond to messages that contain a question mark"
+              />
+              <CheckFormField
+                name="respond_tag_only"
+                label="Respond to @OnyxBot Only"
+                tooltip="If set, OnyxBot will only respond when directly tagged"
+              />
+              <CheckFormField
+                name="respond_to_bots"
+                label="Respond to Bot messages"
+                tooltip="If not set, OnyxBot will always ignore messages from Bots"
+              />

-          <BooleanFormField
-            name="still_need_help_enabled"
-            removeIndent
-            onChange={(checked: boolean) => {
-              setFieldValue("still_need_help_enabled", checked);
-              if (!checked) {
-                setFieldValue("follow_up_tags", []);
-              }
-            }}
-            label={'Give a "Still need help?" button'}
-            tooltip={`OnyxBot's response will include a button at the bottom 
-                of the response that asks the user if they still need help.`}
-          />
-          {values.still_need_help_enabled && (
-            <CollapsibleSection prompt="Configure Still Need Help Button">
              <TextArrayField
-                name="follow_up_tags"
-                label="(Optional) Users / Groups to Tag"
-                values={values}
+                name="respond_member_group_list"
+                label="(Optional) Respond to Certain Users / Groups"
                subtext={
-                  <div>
-                    The Slack users / groups we should tag if the user clicks
-                    the &quot;Still need help?&quot; button. If no emails are
-                    provided, we will not tag anyone and will just react with a
-                    🆘 emoji to the original message.
-                  </div>
+                  "If specified, OnyxBot responses will only " +
+                  "be visible to the members or groups in this list."
                }
+                values={values}
                placeholder="User email or user group name..."
              />
-            </CollapsibleSection>
-          )}

-          <BooleanFormField
-            name="answer_validity_check_enabled"
-            removeIndent
-            label="Only respond if citations found"
-            tooltip="If set, will only answer questions where the model successfully produces citations"
-          />
-          <BooleanFormField
-            name="questionmark_prefilter_enabled"
-            removeIndent
-            label="Only respond to questions"
-            tooltip="If set, OnyxBot will only respond to messages that contain a question mark"
-          />
-          <BooleanFormField
-            name="respond_tag_only"
-            removeIndent
-            label="Respond to @OnyxBot Only"
-            tooltip="If set, OnyxBot will only respond when directly tagged"
-          />
-          <BooleanFormField
-            name="respond_to_bots"
-            removeIndent
-            label="Respond to Bot messages"
-            tooltip="If not set, OnyxBot will always ignore messages from Bots"
-          />
-          <BooleanFormField
-            name="enable_auto_filters"
-            removeIndent
-            label="Enable LLM Autofiltering"
-            tooltip="If set, the LLM will generate source and time filters based on the user's query"
-          />
-
-          <TextArrayField
-            name="respond_member_group_list"
-            label="(Optional) Respond to Certain Users / Groups"
-            subtext={
-              "If specified, OnyxBot responses will only " +
-              "be visible to the members or groups in this list."
-            }
-            values={values}
-            placeholder="User email or user group name..."
-          />
-
-          <StandardAnswerCategoryDropdownField
-            standardAnswerCategoryResponse={standardAnswerCategoryResponse}
-            categories={values.standard_answer_categories}
-            setCategories={(categories: any) =>
-              setFieldValue("standard_answer_categories", categories)
-            }
-          />
-        </div>
-      )}
+              <StandardAnswerCategoryDropdownField
+                standardAnswerCategoryResponse={standardAnswerCategoryResponse}
+                categories={values.standard_answer_categories}
+                setCategories={(categories: any) =>
+                  setFieldValue("standard_answer_categories", categories)
+                }
+              />
+            </div>
+          </AccordionContent>
+        </AccordionItem>
+      </Accordion>

      <div className="flex mt-8 gap-x-2 w-full justify-end">
        {shouldShowPrivacyAlert && (
--- a/web/src/app/admin/bots/page.tsx
+++ b/web/src/app/admin/bots/page.tsx
@@ -11,6 +11,7 @@ import { SourceIcon } from "@/components/SourceIcon";
 import { SlackBotTable } from "./SlackBotTable";
 import { useSlackBots } from "./[bot-id]/hooks";
 import { ValidSources } from "@/lib/types";
+import CreateButton from "@/components/ui/createButton";

 const Main = () => {
  const {
@@ -71,27 +72,7 @@ const Main = () => {
        found in the Onyx documentation to get started!
      </p>

-      <Link
-        className="
-            flex
-            py-2
-            px-4
-            mt-2
-            border
-            border-border
-            h-fit
-            cursor-pointer
-            hover:bg-hover
-            text-sm
-            w-40
-          "
-        href="/admin/bots/new"
-      >
-        <div className="mx-auto flex">
-          <FiPlusSquare className="my-auto mr-2" />
-          New Slack Bot
-        </div>
-      </Link>
+      <CreateButton href="/admin/bots/new" text="New Slack Bot" />

      <SlackBotTable slackBots={slackBots} />
    </div>
--- a/web/src/app/admin/configuration/llm/interfaces.ts
+++ b/web/src/app/admin/configuration/llm/interfaces.ts
@@ -11,6 +11,7 @@ import {
  GeminiIcon,
  OpenSourceIcon,
  AnthropicSVG,
+  IconProps,
 } from "@/components/icons/icons";
 import { FaRobot } from "react-icons/fa";

@@ -74,29 +75,36 @@ export interface LLMProviderDescriptor {
 }

 export const getProviderIcon = (providerName: string, modelName?: string) => {
+  const modelNameToIcon = (
+    modelName: string,
+    fallbackIcon: ({ size, className }: IconProps) => JSX.Element
+  ): (({ size, className }: IconProps) => JSX.Element) => {
+    if (modelName?.toLowerCase().includes("amazon")) {
+      return AmazonIcon;
+    }
+    if (modelName?.toLowerCase().includes("phi")) {
+      return MicrosoftIconSVG;
+    }
+    if (modelName?.toLowerCase().includes("mistral")) {
+      return MistralIcon;
+    }
+    if (modelName?.toLowerCase().includes("llama")) {
+      return MetaIcon;
+    }
+    if (modelName?.toLowerCase().includes("gemini")) {
+      return GeminiIcon;
+    }
+    if (modelName?.toLowerCase().includes("claude")) {
+      return AnthropicIcon;
+    } else {
+      return fallbackIcon;
+    }
+  };
+
  switch (providerName) {
    case "openai":
      // Special cases for openai based on modelName
-      if (modelName?.toLowerCase().includes("amazon")) {
-        return AmazonIcon;
-      }
-      if (modelName?.toLowerCase().includes("phi")) {
-        return MicrosoftIconSVG;
-      }
-      if (modelName?.toLowerCase().includes("mistral")) {
-        return MistralIcon;
-      }
-      if (modelName?.toLowerCase().includes("llama")) {
-        return MetaIcon;
-      }
-      if (modelName?.toLowerCase().includes("gemini")) {
-        return GeminiIcon;
-      }
-      if (modelName?.toLowerCase().includes("claude")) {
-        return AnthropicIcon;
-      }
-
-      return OpenAIIcon; // Default for openai
+      return modelNameToIcon(modelName || "", OpenAIIcon);
    case "anthropic":
      return AnthropicSVG;
    case "bedrock":
@@ -104,7 +112,7 @@ export const getProviderIcon = (providerName: string, modelName?: string) => {
    case "azure":
      return AzureIcon;
    default:
-      return CPUIcon;
+      return modelNameToIcon(modelName || "", CPUIcon);
  }
 };

--- a/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx
+++ b/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx
@@ -18,7 +18,11 @@ import AdvancedFormPage from "./pages/Advanced";
 import DynamicConnectionForm from "./pages/DynamicConnectorCreationForm";
 import CreateCredential from "@/components/credentials/actions/CreateCredential";
 import ModifyCredential from "@/components/credentials/actions/ModifyCredential";
-import { ConfigurableSources, oauthSupportedSources } from "@/lib/types";
+import {
+  ConfigurableSources,
+  oauthSupportedSources,
+  ValidSources,
+} from "@/lib/types";
 import {
  Credential,
  credentialTemplates,
@@ -444,7 +448,7 @@ export default function AddConnector({
              <CardSection>
                <Title className="mb-2 text-lg">Select a credential</Title>

-                {connector == "gmail" ? (
+                {connector == ValidSources.Gmail ? (
                  <GmailMain />
                ) : (
                  <>
--- a/web/src/app/admin/documents/sets/page.tsx
+++ b/web/src/app/admin/documents/sets/page.tsx
@@ -40,6 +40,7 @@ import {
  TooltipProvider,
  TooltipTrigger,
 } from "@/components/ui/tooltip";
+import CreateButton from "@/components/ui/createButton";

 const numToDisplay = 50;

@@ -305,9 +306,13 @@ const Main = () => {
      <div className="mb-3"></div>

      <div className="flex mb-6">
-        <Link href="/admin/documents/sets/new">
+        <CreateButton
+          href="/admin/documents/sets/new"
+          text="New Document Set"
+        />
+        {/* <Link href="/admin/documents/sets/new">
          <Button variant="navigate">New Document Set</Button>
-        </Link>
+        </Link> */}
      </div>

      {documentSets.length > 0 && (
--- a/web/src/app/admin/settings/SettingsForm.tsx
+++ b/web/src/app/admin/settings/SettingsForm.tsx
@@ -231,7 +231,7 @@ export function SettingsForm() {
      <Checkbox
        label="Pro Search Disabled"
        sublabel="If set, users will not be able to use Pro Search."
-        checked={settings.pro_search_disabled}
+        checked={settings.pro_search_disabled ?? false}
        onChange={(e) =>
          handleToggleSettingsField("pro_search_disabled", e.target.checked)
        }
--- a/web/src/app/admin/settings/interfaces.ts
+++ b/web/src/app/admin/settings/interfaces.ts
@@ -10,7 +10,7 @@ export interface Settings {
  notifications: Notification[];
  needs_reindexing: boolean;
  gpu_enabled: boolean;
-  pro_search_disabled: boolean;
+  pro_search_disabled: boolean | null;
  product_gating: GatingType;
  auto_scroll: boolean;
 }
--- a/web/src/app/admin/token-rate-limits/page.tsx
+++ b/web/src/app/admin/token-rate-limits/page.tsx
@@ -18,6 +18,7 @@ import { usePopup } from "@/components/admin/connectors/Popup";
 import { CreateRateLimitModal } from "./CreateRateLimitModal";
 import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled";
 import { ShieldIcon } from "@/components/icons/icons";
+import CreateButton from "@/components/ui/createButton";

 const BASE_URL = "/api/admin/token-rate-limits";
 const GLOBAL_TOKEN_FETCH_URL = `${BASE_URL}/global`;
@@ -138,15 +139,10 @@ function Main() {
        </li>
      </ul>

-      <Button
-        variant="navigate"
-        size="sm"
-        className="my-4"
-        onClick={() => setModalIsOpen(true)}
-      >
-        Create a Token Rate Limit
-      </Button>
-
+      <CreateButton
+        href="/admin/token-rate-limits/new"
+        text="Create a Token Rate Limit"
+      />
      {isPaidEnterpriseFeaturesEnabled && (
        <Tabs
          value={tabIndex.toString()}
--- a/web/src/app/admin/tools/page.tsx
+++ b/web/src/app/admin/tools/page.tsx
@@ -9,6 +9,7 @@ import { fetchSS } from "@/lib/utilsSS";
 import { ErrorCallout } from "@/components/ErrorCallout";
 import { AdminPageTitle } from "@/components/admin/Title";
 import { ToolIcon } from "@/components/icons/icons";
+import CreateButton from "@/components/ui/createButton";

 export default async function Page() {
  const toolResponse = await fetchSS("/tool");
@@ -39,27 +40,7 @@ export default async function Page() {
        <Separator />

        <Title>Create a Tool</Title>
-        <Link
-          href="/admin/tools/new"
-          className="
-            flex
-            py-2
-            px-4
-            mt-2
-            border
-            border-border
-            h-fit
-            cursor-pointer
-            hover:bg-hover
-            text-sm
-            w-40
-          "
-        >
-          <div className="mx-auto flex">
-            <FiPlusSquare className="my-auto mr-2" />
-            New Tool
-          </div>
-        </Link>
+        <CreateButton href="/admin/tools/new" text="New Tool" />

        <Separator />

--- a/web/src/app/chat/ChatPage.tsx
+++ b/web/src/app/chat/ChatPage.tsx
@@ -471,9 +471,6 @@ export function ChatPage({
        }
        return;
      }
-      const shouldScrollToBottom =
-        visibleRange.get(existingChatSessionId) === undefined ||
-        visibleRange.get(existingChatSessionId)?.end == 0;

      clearSelectedDocuments();
      setIsFetchingChatMessages(true);
@@ -511,16 +508,13 @@ export function ChatPage({

      // go to bottom. If initial load, then do a scroll,
      // otherwise just appear at the bottom
-      if (shouldScrollToBottom) {
-        scrollInitialized.current = false;
-      }

-      if (shouldScrollToBottom) {
-        if (!hasPerformedInitialScroll && autoScrollEnabled) {
-          clientScrollToBottom();
-        } else if (isChatSessionSwitch && autoScrollEnabled) {
-          clientScrollToBottom(true);
-        }
+      scrollInitialized.current = false;
+
+      if (!hasPerformedInitialScroll) {
+        clientScrollToBottom();
+      } else if (isChatSessionSwitch) {
+        clientScrollToBottom(true);
      }

      setIsFetchingChatMessages(false);
@@ -1034,6 +1028,7 @@ export function ChatPage({
    ) {
      setDocumentSidebarToggled(false);
    }
+    clientScrollToBottom();
  }, [chatSessionIdRef.current]);

  const loadNewPageLogic = (event: MessageEvent) => {
@@ -1068,7 +1063,6 @@ export function ChatPage({
  if (!documentSidebarInitialWidth && maxDocumentSidebarWidth) {
    documentSidebarInitialWidth = Math.min(700, maxDocumentSidebarWidth);
  }
-
  class CurrentMessageFIFO {
    private stack: PacketType[] = [];
    isComplete: boolean = false;
@@ -1127,6 +1121,7 @@ export function ChatPage({
        "Continue Generating (pick up exactly where you left off)",
    });
  };
+  const [uncaughtError, setUncaughtError] = useState<string | null>(null);

  const onSubmit = async ({
    messageIdToResend,
@@ -1332,7 +1327,9 @@ export function ChatPage({
          searchParams.get(SEARCH_PARAM_NAMES.SYSTEM_PROMPT) || undefined,
        useExistingUserMessage: isSeededChat,
        useLanggraph:
-          !settings?.settings.pro_search_disabled && proSearchEnabled,
+          !settings?.settings.pro_search_disabled &&
+          proSearchEnabled &&
+          retrievalEnabled,
      });

      const delay = (ms: number) => {
@@ -1440,21 +1437,22 @@ export function ChatPage({
              }
            }

-            // Continuously refine the sub_questions based on the packets that we receive
+            // // Continuously refine the sub_questions based on the packets that we receive
            if (
              Object.hasOwn(packet, "stop_reason") &&
              Object.hasOwn(packet, "level_question_num")
            ) {
-              // sub_questions = constructSubQuestions(
-              //   sub_questions,
-              //   packet as StreamStopInfo
-              // );
+              sub_questions = constructSubQuestions(
+                sub_questions,
+                packet as StreamStopInfo
+              );
            } else if (Object.hasOwn(packet, "sub_question")) {
              is_generating = true;
              sub_questions = constructSubQuestions(
                sub_questions,
                packet as SubQuestionPiece
              );
+              setAgenticGenerating(true);
            } else if (Object.hasOwn(packet, "sub_query")) {
              sub_questions = constructSubQuestions(
                sub_questions,
@@ -1552,8 +1550,23 @@ export function ChatPage({
                }
              );
            } else if (Object.hasOwn(packet, "error")) {
-              error = (packet as StreamingError).error;
-              stackTrace = (packet as StreamingError).stack_trace;
+              if (
+                sub_questions.length > 0 &&
+                sub_questions
+                  .filter((q) => q.level === 0)
+                  .every((q) => q.is_stopped === true)
+              ) {
+                setUncaughtError((packet as StreamingError).error);
+                updateChatState("input");
+                setAgenticGenerating(false);
+                setAlternativeGeneratingAssistant(null);
+                setSubmittedMessage("");
+                return;
+                // throw new Error((packet as StreamingError).error);
+              } else {
+                error = (packet as StreamingError).error;
+                stackTrace = (packet as StreamingError).stack_trace;
+              }
            } else if (Object.hasOwn(packet, "message_id")) {
              finalMessage = packet as BackendMessage;
            } else if (Object.hasOwn(packet, "stop_reason")) {
@@ -1663,6 +1676,7 @@ export function ChatPage({
        completeMessageMapOverride: currentMessageMap(completeMessageDetail),
      });
    }
+    setAgenticGenerating(false);
    resetRegenerationState(currentSessionId());

    updateChatState("input");
@@ -1790,6 +1804,7 @@ export function ChatPage({
  // Used to maintain a "time out" for history sidebar so our existing refs can have time to process change
  const [untoggled, setUntoggled] = useState(false);
  const [loadingError, setLoadingError] = useState<string | null>(null);
+  const [agenticGenerating, setAgenticGenerating] = useState(false);

  const explicitlyUntoggle = () => {
    setShowHistorySidebar(false);
@@ -1834,17 +1849,17 @@ export function ChatPage({
  const autoScrollEnabled =
    user?.preferences?.auto_scroll == null
      ? settings?.enterpriseSettings?.auto_scroll || false
-      : user?.preferences?.auto_scroll!;
+      : user?.preferences?.auto_scroll! && !agenticGenerating;

-  // useScrollonStream({
-  //   chatState: currentSessionChatState,
-  //   scrollableDivRef,
-  //   scrollDist,
-  //   endDivRef,
-  //   debounceNumber,
-  //   mobile: settings?.isMobile,
-  //   enableAutoScroll: autoScrollEnabled,
-  // });
+  useScrollonStream({
+    chatState: currentSessionChatState,
+    scrollableDivRef,
+    scrollDist,
+    endDivRef,
+    debounceNumber,
+    mobile: settings?.isMobile,
+    enableAutoScroll: autoScrollEnabled,
+  });

  // Virtualization + Scrolling related effects and functions
  const scrollInitialized = useRef(false);
@@ -2040,6 +2055,7 @@ export function ChatPage({
        }

        const data = await response.json();
+
        router.push(data.redirect_url);
      } catch (error) {
        console.error("Error seeding chat from Slack:", error);
@@ -2634,6 +2650,7 @@ export function ChatPage({
                                    {message.sub_questions &&
                                    message.sub_questions.length > 0 ? (
                                      <AgenticMessage
+                                        error={uncaughtError}
                                        docSidebarToggled={
                                          documentSidebarToggled &&
                                          (selectedMessageForDocDisplay ==
@@ -3058,20 +3075,19 @@ export function ChatPage({
                        </div>
                        <div
                          ref={inputRef}
-                          className="absolute bottom-0 z-10 w-full"
+                          className="absolute pointer-events-none bottom-0 z-10 w-full"
                        >
-                          <div className="w-[95%] mx-auto relative mb-8">
-                            {aboveHorizon && (
-                              <div className="pointer-events-none w-full bg-transparent flex sticky justify-center">
-                                <button
-                                  onClick={() => clientScrollToBottom()}
-                                  className="p-1 pointer-events-auto rounded-2xl bg-background-strong border border-border mb-2 mx-auto "
-                                >
-                                  <FiArrowDown size={18} />
-                                </button>
-                              </div>
-                            )}
-
+                          {aboveHorizon && (
+                            <div className="mx-auto w-fit !pointer-events-none flex sticky justify-center">
+                              <button
+                                onClick={() => clientScrollToBottom()}
+                                className="p-1 pointer-events-auto rounded-2xl bg-background-strong border border-border  mx-auto "
+                              >
+                                <FiArrowDown size={18} />
+                              </button>
+                            </div>
+                          )}
+                          <div className="pointer-events-auto w-[95%] mx-auto relative mb-8">
                            <ChatInputBar
                              proSearchEnabled={proSearchEnabled}
                              setProSearchEnabled={() => toggleProSearch()}
--- a/web/src/app/chat/input/ChatInputBar.tsx
+++ b/web/src/app/chat/input/ChatInputBar.tsx
@@ -1,5 +1,6 @@
 import React, { useContext, useEffect, useRef, useState } from "react";
 import { FiPlusCircle, FiPlus, FiInfo, FiX, FiFilter } from "react-icons/fi";
+import { FiLoader } from "react-icons/fi";
 import { ChatInputOption } from "./ChatInputOption";
 import { Persona } from "@/app/admin/assistants/interfaces";
 import LLMPopover from "./LLMPopover";
@@ -36,6 +37,9 @@ import { buildImgUrl } from "../files/images/utils";
 import { useUser } from "@/components/user/UserProvider";
 import { AgenticToggle } from "./AgenticToggle";
 import { SettingsContext } from "@/components/settings/SettingsProvider";
+import { LoadingIndicator } from "react-select/dist/declarations/src/components/indicators";
+import { FidgetSpinner } from "react-loader-spinner";
+import { LoadingAnimation } from "@/components/Loading";

 const MAX_INPUT_HEIGHT = 200;
 export const SourceChip2 = ({
@@ -709,12 +713,16 @@ export function ChatInputBar({
                      <SourceChip
                        key={`file-${index}`}
                        icon={
-                          <img
-                            className="h-full py-.5 object-cover rounded-lg bg-background cursor-pointer"
-                            src={buildImgUrl(file.id)}
-                          />
+                          file.isUploading ? (
+                            <FiLoader className="animate-spin" />
+                          ) : (
+                            <img
+                              className="h-full py-.5 object-cover rounded-lg bg-background cursor-pointer"
+                              src={buildImgUrl(file.id)}
+                            />
+                          )
                        }
-                        title={file.name || "File"}
+                        title={file.name || "File" + file.id}
                        onRemove={() => {
                          setFiles(
                            files.filter(
--- a/web/src/app/chat/input/LLMPopover.tsx
+++ b/web/src/app/chat/input/LLMPopover.tsx
@@ -5,7 +5,7 @@ import {
  PopoverTrigger,
 } from "@/components/ui/popover";
 import { ChatInputOption } from "./ChatInputOption";
-import { getDisplayNameForModel } from "@/lib/hooks";
+import { defaultModelsByProvider, getDisplayNameForModel } from "@/lib/hooks";
 import {
  checkLLMSupportsImageInput,
  destructureValue,
@@ -61,22 +61,23 @@ export default function LLMPopover({
      llmOptionsByProvider[llmProvider.provider] = [];
    }

-    (llmProvider.display_model_names || llmProvider.model_names).forEach(
-      (modelName) => {
-        if (!uniqueModelNames.has(modelName)) {
-          uniqueModelNames.add(modelName);
-          llmOptionsByProvider[llmProvider.provider].push({
-            name: modelName,
-            value: structureValue(
-              llmProvider.name,
-              llmProvider.provider,
-              modelName
-            ),
-            icon: getProviderIcon(llmProvider.provider, modelName),
-          });
-        }
+    (
+      llmProvider.display_model_names ||
+      defaultModelsByProvider[llmProvider.provider]
+    ).forEach((modelName) => {
+      if (!uniqueModelNames.has(modelName)) {
+        uniqueModelNames.add(modelName);
+        llmOptionsByProvider[llmProvider.provider].push({
+          name: modelName,
+          value: structureValue(
+            llmProvider.name,
+            llmProvider.provider,
+            modelName
+          ),
+          icon: getProviderIcon(llmProvider.provider, modelName),
+        });
      }
-    );
+    });
  });

  const llmOptions = Object.entries(llmOptionsByProvider).flatMap(
--- a/web/src/app/chat/interfaces.ts
+++ b/web/src/app/chat/interfaces.ts
@@ -218,6 +218,7 @@ export interface SubQuestionDetail extends BaseQuestionIdentifier {
  sub_queries?: SubQueryDetail[] | null;
  context_docs?: { top_documents: OnyxDocument[] } | null;
  is_complete?: boolean;
+  is_stopped?: boolean;
 }

 export interface SubQueryDetail {
@@ -249,14 +250,13 @@ export const constructSubQuestions = (
  // );

  if ("stop_reason" in newDetail) {
-    console.log("STOP REASON");
-    console.log(newDetail);
    const { level, level_question_num } = newDetail;
    let subQuestion = updatedSubQuestions.find(
      (sq) => sq.level === level && sq.level_question_num === level_question_num
    );
    if (subQuestion) {
-      // subQuestion.is_complete = true;
+      subQuestion.is_complete = true;
+      subQuestion.is_stopped = true;
    }
  } else if ("top_documents" in newDetail) {
    const { level, level_question_num, top_documents } = newDetail;
--- a/web/src/app/chat/message/AgenticMessage.tsx
+++ b/web/src/app/chat/message/AgenticMessage.tsx
@@ -80,6 +80,7 @@ export const AgenticMessage = ({
  agenticDocs,
  secondLevelSubquestions,
  toggleDocDisplay,
+  error,
 }: {
  docSidebarToggled?: boolean;
  isImprovement?: boolean | null;
@@ -110,6 +111,7 @@ export const AgenticMessage = ({
  regenerate?: (modelOverRide: LlmOverride) => Promise<void>;
  setPresentingDocument?: (document: OnyxDocument) => void;
  toggleDocDisplay?: (agentic: boolean) => void;
+  error?: string | null;
 }) => {
  const [noShowingMessage, setNoShowingMessage] = useState(isComplete);

@@ -483,11 +485,28 @@ export const AgenticMessage = ({
                          ) : (
                            content
                          )}
+                          {error && (
+                            <p className="mt-2 text-red-700 text-sm my-auto">
+                              {error}
+                            </p>
+                          )}
                        </div>
                      </div>
                    </>
-                  ) : isComplete ? null : (
-                    <></>
+                  ) : isComplete ? (
+                    error && (
+                      <p className="mt-2 mx-4 text-red-700 text-sm my-auto">
+                        {error}
+                      </p>
+                    )
+                  ) : (
+                    <>
+                      {error && (
+                        <p className="mt-2 mx-4 text-red-700 text-sm my-auto">
+                          {error}
+                        </p>
+                      )}
+                    </>
                  )}
                  {handleFeedback &&
                    (isActive ? (
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
joachim-danswer	1c23cf574c	Nits	2025-02-10 17:13:16 -08:00
joachim-danswer	0ccf78ac52	reused error strings & BaseMessage_Content	2025-02-10 16:57:25 -08:00
joachim-danswer	02b4b4bf0d	remove execs	2025-02-10 16:21:59 -08:00
joachim-danswer	dade11a2e6	EL - OVERRIDE	2025-02-10 14:41:55 -08:00
joachim-danswer	188a5f0d62	EL comments - overwrite -> override - enums for error types - some nits	2025-02-10 14:33:58 -08:00
joachim-danswer	89c0b1ad37	YS comments	2025-02-10 13:29:12 -08:00
pablodanswer	8b20fd31b6	quick update	2025-02-08 17:14:01 -08:00
pablodanswer	6a73245986	quick ux update	2025-02-07 23:40:26 -08:00
joachim-danswer	dd73fdcd08	timeout prep backend	2025-02-07 18:21:35 -08:00
joachim-danswer	768456609a	Removal of defaults from various input states + removal of bas	2025-02-07 18:19:18 -08:00
rkuo-danswer	ae37f01f62	event driven indexing/docset/usergroup triggers (#3918 ) * WIP * trigger indexing immediately when the ccpair is created * add some logging and indexing trigger to the mock-credential endpoint * better comments * fix integration test --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-07 22:53:51 +00:00
pablodanswer	ef31e14518	remove debug logs for integration tests	2025-02-07 10:46:24 -08:00
evan-danswer	9b0cba367e	small linear connector improvements (#3929 ) * small linear connector improvements * add todo for url handling	2025-02-07 01:31:49 +00:00
pablonyx	48ac690a70	Multi tenant tests (#3919 ) * ensure fail on multi tenant successfully * attempted fix * udpate ingration tests * minor update * improve * improve workflow * fix migrations * many more logs * quick fix * improve * fix typo * quick nit * attempted fix * very minor clean up	2025-02-07 01:24:00 +00:00
pablodanswer	bfa4fbd691	minor delay	2025-02-06 16:28:38 -08:00
rkuo-danswer	58fdc86d41	fix chromatic save/upload (#3927 ) * try adding back some params * raise timeout * update chromatic version * fix typo * use chromatic imports * update gitignore * slim down the config file * update readme --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-06 22:02:14 +00:00
pablonyx	6ff452a2e1	Update popup + misc standardization (#3906 ) * pop * various minor improvements * improvement * finalize * update	2025-02-06 21:22:06 +00:00
pablonyx	e9b892301b	Improvements to Redis + Vespa debugging Improvements to Redis + Vespa debugging	2025-02-06 13:30:32 -08:00
pablodanswer	a202e2bf9d	Improvements to Redis + Vespa debugging	2025-02-06 13:30:06 -08:00
pablonyx	3bc4e0d12f	Very minor robustification (#3926 ) * very minor robustification * robust	2025-02-06 19:55:38 +00:00
trial-danswer	2fc41cd5df	Helm Chart Fixes (#3900 ) * initial commit for helm chart refactoring * Continue refactoring helm. I was able to use helm to deploy all of the apps to a cluster in aws. The bottleneck was setting up PVC dynamic provisioning. * use default storage class * Fix linter errors * Fix broken helm test * update * Helm chart fixes * remove reference to ebsstorage * Fix linter errors --------- Co-authored-by: jpb80 <jordan.buttkevitz@gmail.com>	2025-02-06 10:41:09 -08:00
pablodanswer	8c42ff2ff8	slackbot configuration fix	2025-02-06 09:36:58 -08:00
rkuo-danswer	6ccb3f085a	select only doc_id (#3920 ) * select only doc_id * select more doc ids * fix user group --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>	2025-02-06 07:00:40 +00:00
pablonyx	a0a1b431be	Various UX improvements Various improvements	2025-02-05 21:13:22 -08:00
pablodanswer	f137fc78a6	various UX improvements	2025-02-05 21:12:55 -08:00
pablonyx	396f096dda	Allows for Slackbots that do not have search enabled Allow no search	2025-02-05 19:20:20 -08:00
pablodanswer	e04b2d6ff3	Allows for Slackbots that do not have search enabled	2025-02-05 19:19:50 -08:00
pablonyx	cbd8b094bd	Minor misc docset updates Minor misc docset updates	2025-02-05 19:14:32 -08:00
pablodanswer	5c7487e91f	ensure tests pass	2025-02-05 17:02:49 -08:00