nit

chore(dr): finer grained tracing for clarification step, research plan step, and orchestration step (#7374 )
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-02-22 18:25:45 +00:00 · 2026-01-12 16:45:04 -08:00 · 2026-01-12 23:58:27 +00:00 · 2026-01-12 23:21:02 +00:00 · 2026-01-12 23:01:55 +00:00 · 2026-01-12 22:58:11 +00:00
251 changed files with 9280 additions and 5355 deletions
--- a/.github/workflows/deployment.yml
+++ b/.github/workflows/deployment.yml
@@ -285,7 +285,7 @@ jobs:

          Write-Host "Versions set to: $VERSION"

-      - uses: tauri-apps/tauri-action@19b93bb55601e3e373a93cfb6eb4242e45f5af20 # ratchet:tauri-apps/tauri-action@action-v0.6.0
+      - uses: tauri-apps/tauri-action@73fb865345c54760d875b94642314f8c0c894afa # ratchet:tauri-apps/tauri-action@action-v0.6.1
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        with:
--- a/.github/workflows/nightly-close-stale-issues.yml
+++ b/.github/workflows/nightly-close-stale-issues.yml
@@ -13,7 +13,7 @@ jobs:
    runs-on: ubuntu-latest
    timeout-minutes: 45
    steps:
-      - uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # ratchet:actions/stale@v10
+      - uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # ratchet:actions/stale@v10
        with:
          stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
          stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.'
--- a/.github/workflows/pr-external-dependency-unit-tests.yml
+++ b/.github/workflows/pr-external-dependency-unit-tests.yml
@@ -172,7 +172,7 @@ jobs:

      - name: Upload Docker logs
        if: failure()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v5
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: docker-logs-${{ matrix.test-dir }}
          path: docker-logs/
--- a/.github/workflows/pr-integration-tests.yml
+++ b/.github/workflows/pr-integration-tests.yml
@@ -310,8 +310,9 @@ jobs:
          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
          INTEGRATION_TESTS_MODE=true
          CHECK_TTL_MANAGEMENT_TASK_FREQUENCY_IN_HOURS=0.001
-          AUTO_LLM_UPDATE_INTERVAL_SECONDS=1
+          AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
          MCP_SERVER_ENABLED=true
+          USE_LIGHTWEIGHT_BACKGROUND_WORKER=false
          EOF

      - name: Start Docker containers
@@ -438,7 +439,7 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
@@ -567,7 +568,7 @@ jobs:

      - name: Upload logs (multi-tenant)
        if: always()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: docker-all-logs-multitenant
          path: ${{ github.workspace }}/docker-compose-multitenant.log
--- a/.github/workflows/pr-jest-tests.yml
+++ b/.github/workflows/pr-jest-tests.yml
@@ -44,7 +44,7 @@ jobs:

      - name: Upload coverage reports
        if: always()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: jest-coverage-${{ github.run_id }}
          path: ./web/coverage
--- a/.github/workflows/pr-mit-integration-tests.yml
+++ b/.github/workflows/pr-mit-integration-tests.yml
@@ -301,7 +301,7 @@ jobs:
          ONYX_MODEL_SERVER_IMAGE=${ECR_CACHE}:integration-test-model-server-test-${RUN_ID}
          INTEGRATION_TESTS_MODE=true
          MCP_SERVER_ENABLED=true
-          AUTO_LLM_UPDATE_INTERVAL_SECONDS=1
+          AUTO_LLM_UPDATE_INTERVAL_SECONDS=10
          EOF

      - name: Start Docker containers
@@ -424,7 +424,7 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: docker-all-logs-${{ matrix.test-dir.name }}
          path: ${{ github.workspace }}/docker-compose.log
--- a/.github/workflows/pr-playwright-tests.yml
+++ b/.github/workflows/pr-playwright-tests.yml
@@ -435,7 +435,7 @@ jobs:
          fi
          npx playwright test --project ${PROJECT}

-      - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+      - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        if: always()
        with:
          # Includes test results and trace.zip files
@@ -455,7 +455,7 @@ jobs:

      - name: Upload logs
        if: success() || failure()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: docker-logs-${{ matrix.project }}-${{ github.run_id }}
          path: ${{ github.workspace }}/docker-compose.log
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@@ -144,7 +144,7 @@ jobs:

      - name: Upload logs
        if: always()
-        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # ratchet:actions/upload-artifact@v4
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
        with:
          name: docker-all-logs
          path: ${{ github.workspace }}/docker-compose.log
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,7 @@ backend/tests/regression/search_quality/*.json
 backend/onyx/evals/data/
 backend/onyx/evals/one_off/*.json
 *.log
+*.csv

 # secret files
 .env
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -11,7 +11,6 @@ repos:
      - id: uv-sync
        args: ["--locked", "--all-extras"]
      - id: uv-lock
-        files: ^pyproject\.toml$
      - id: uv-export
        name: uv-export default.txt
        args:
--- a/backend/alembic/versions/8405ca81cc83_notifications_constraint.py
+++ b/backend/alembic/versions/8405ca81cc83_notifications_constraint.py
@@ -0,0 +1,49 @@
+"""notifications constraint, sort index, and cleanup old notifications
+
+Revision ID: 8405ca81cc83
+Revises: a3c1a7904cd0
+Create Date: 2026-01-07 16:43:44.855156
+
+"""
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision = "8405ca81cc83"
+down_revision = "a3c1a7904cd0"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Create unique index for notification deduplication.
+    # This enables atomic ON CONFLICT DO NOTHING inserts in batch_create_notifications.
+    #
+    # Uses COALESCE to handle NULL additional_data (NULLs are normally distinct
+    # in unique constraints, but we want NULL == NULL for deduplication).
+    # The '{}' represents an empty JSONB object as the NULL replacement.
+
+    op.execute(
+        """
+        CREATE UNIQUE INDEX IF NOT EXISTS ix_notification_user_type_data
+        ON notification (user_id, notif_type, COALESCE(additional_data, '{}'::jsonb))
+        """
+    )
+
+    # Create index for efficient notification sorting by user
+    # Covers: WHERE user_id = ? ORDER BY dismissed, first_shown DESC
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS ix_notification_user_sort
+        ON notification (user_id, dismissed, first_shown DESC)
+        """
+    )
+
+    # Clean up legacy 'reindex' notifications that are no longer needed
+    op.execute("DELETE FROM notification WHERE title = 'New Notification'")
+
+
+def downgrade() -> None:
+    op.execute("DROP INDEX IF EXISTS ix_notification_user_type_data")
+    op.execute("DROP INDEX IF EXISTS ix_notification_user_sort")
--- a/backend/alembic/versions/c1d2e3f4a5b6_add_deep_research_tool.py
+++ b/backend/alembic/versions/c1d2e3f4a5b6_add_deep_research_tool.py
@@ -7,7 +7,6 @@ Create Date: 2025-12-18 16:00:00.000000
 """

 from alembic import op
-from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_DB_NAME
 import sqlalchemy as sa


@@ -19,7 +18,7 @@ depends_on = None


 DEEP_RESEARCH_TOOL = {
-    "name": RESEARCH_AGENT_DB_NAME,
+    "name": "ResearchAgent",
    "display_name": "Research Agent",
    "description": "The Research Agent is a sub-agent that conducts research on a specific topic.",
    "in_code_tool_id": "ResearchAgent",
--- a/backend/alembic/versions/d25168c2beee_tool_name_consistency.py
+++ b/backend/alembic/versions/d25168c2beee_tool_name_consistency.py
@@ -0,0 +1,86 @@
+"""tool_name_consistency
+
+Revision ID: d25168c2beee
+Revises: 8405ca81cc83
+Create Date: 2026-01-11 17:54:40.135777
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "d25168c2beee"
+down_revision = "8405ca81cc83"
+branch_labels = None
+depends_on = None
+
+
+# Currently the seeded tools have the in_code_tool_id == name
+CURRENT_TOOL_NAME_MAPPING = [
+    "SearchTool",
+    "WebSearchTool",
+    "ImageGenerationTool",
+    "PythonTool",
+    "OpenURLTool",
+    "KnowledgeGraphTool",
+    "ResearchAgent",
+]
+
+# Mapping of in_code_tool_id -> name
+# These are the expected names that we want in the database
+EXPECTED_TOOL_NAME_MAPPING = {
+    "SearchTool": "internal_search",
+    "WebSearchTool": "web_search",
+    "ImageGenerationTool": "generate_image",
+    "PythonTool": "python",
+    "OpenURLTool": "open_url",
+    "KnowledgeGraphTool": "run_kg_search",
+    "ResearchAgent": "research_agent",
+}
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+
+    # Mapping of in_code_tool_id to the NAME constant from each tool class
+    # These match the .name property of each tool implementation
+    tool_name_mapping = EXPECTED_TOOL_NAME_MAPPING
+
+    # Update the name column for each tool based on its in_code_tool_id
+    for in_code_tool_id, expected_name in tool_name_mapping.items():
+        conn.execute(
+            sa.text(
+                """
+                UPDATE tool
+                SET name = :expected_name
+                WHERE in_code_tool_id = :in_code_tool_id
+                """
+            ),
+            {
+                "expected_name": expected_name,
+                "in_code_tool_id": in_code_tool_id,
+            },
+        )
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+
+    # Reverse the migration by setting name back to in_code_tool_id
+    # This matches the original pattern where name was the class name
+    for in_code_tool_id in CURRENT_TOOL_NAME_MAPPING:
+        conn.execute(
+            sa.text(
+                """
+                UPDATE tool
+                SET name = :current_name
+                WHERE in_code_tool_id = :in_code_tool_id
+                """
+            ),
+            {
+                "current_name": in_code_tool_id,
+                "in_code_tool_id": in_code_tool_id,
+            },
+        )
--- a/backend/ee/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/chat_backend.py
@@ -23,6 +23,7 @@ from onyx.db.models import User
 from onyx.llm.factory import get_llm_for_persona
 from onyx.natural_language_processing.utils import get_tokenizer
 from onyx.server.query_and_chat.models import CreateChatMessageRequest
+from onyx.server.query_and_chat.models import MessageOrigin
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
@@ -100,6 +101,7 @@ def handle_simplified_chat_message(
        chunks_below=0,
        full_doc=chat_message_req.full_doc,
        structured_response_format=chat_message_req.structured_response_format,
+        origin=MessageOrigin.API,
    )

    packets = stream_chat_message_objects(
@@ -203,6 +205,7 @@ def handle_send_message_simple_with_history(
        chunks_below=0,
        full_doc=req.full_doc,
        structured_response_format=req.structured_response_format,
+        origin=MessageOrigin.API,
    )

    packets = stream_chat_message_objects(
--- a/backend/ee/onyx/server/usage_limits.py
+++ b/backend/ee/onyx/server/usage_limits.py
@@ -1,8 +1,5 @@
 """EE Usage limits - trial detection via billing information."""

-from datetime import datetime
-from datetime import timezone
-
 from ee.onyx.server.tenants.billing import fetch_billing_information
 from ee.onyx.server.tenants.models import BillingInformation
 from ee.onyx.server.tenants.models import SubscriptionStatusResponse
@@ -31,13 +28,7 @@ def is_tenant_on_trial(tenant_id: str) -> bool:
            return True

        if isinstance(billing_info, BillingInformation):
-            # Check if trial is active
-            if billing_info.trial_end is not None:
-                now = datetime.now(timezone.utc)
-                # Trial active if trial_end is in the future
-                # and subscription status indicates trialing
-                if billing_info.trial_end > now and billing_info.status == "trialing":
-                    return True
+            return billing_info.status == "trialing"

        return False

--- a/backend/onyx/access/models.py
+++ b/backend/onyx/access/models.py
@@ -105,6 +105,8 @@ class DocExternalAccess:
        )


+# TODO(andrei): First refactor this into a pydantic model, then get rid of
+# duplicate fields.
@dataclass(frozen=True, init=False)
 class DocumentAccess(ExternalAccess):
    # User emails for Onyx users, None indicates admin
--- a/backend/onyx/background/celery/apps/background.py
+++ b/backend/onyx/background/celery/apps/background.py
@@ -124,6 +124,7 @@ celery_app.autodiscover_tasks(
        "onyx.background.celery.tasks.kg_processing",
        "onyx.background.celery.tasks.monitoring",
        "onyx.background.celery.tasks.user_file_processing",
+        "onyx.background.celery.tasks.llm_model_update",
        # Light worker tasks
        "onyx.background.celery.tasks.shared",
        "onyx.background.celery.tasks.vespa",
--- a/backend/onyx/background/celery/tasks/beat_schedule.py
+++ b/backend/onyx/background/celery/tasks/beat_schedule.py
@@ -174,7 +174,7 @@ if AUTO_LLM_CONFIG_URL:
            "schedule": timedelta(seconds=AUTO_LLM_UPDATE_INTERVAL_SECONDS),
            "options": {
                "priority": OnyxCeleryPriority.LOW,
-                "expires": AUTO_LLM_UPDATE_INTERVAL_SECONDS,
+                "expires": BEAT_EXPIRES_DEFAULT,
            },
        }
    )
--- a/backend/onyx/background/celery/tasks/llm_model_update/tasks.py
+++ b/backend/onyx/background/celery/tasks/llm_model_update/tasks.py
@@ -5,6 +5,9 @@ from onyx.background.celery.apps.app_base import task_logger
 from onyx.configs.app_configs import AUTO_LLM_CONFIG_URL
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.db.engine.sql_engine import get_session_with_current_tenant
+from onyx.llm.well_known_providers.auto_update_service import (
+    sync_llm_models_from_github,
+)


@shared_task(
@@ -26,24 +29,9 @@ def check_for_auto_llm_updates(self: Task, *, tenant_id: str) -> bool | None:
        return None

    try:
-        # Import here to avoid circular imports
-        from onyx.llm.well_known_providers.auto_update_service import (
-            fetch_llm_recommendations_from_github,
-        )
-        from onyx.llm.well_known_providers.auto_update_service import (
-            sync_llm_models_from_github,
-        )
-
-        # Fetch config from GitHub
-        config = fetch_llm_recommendations_from_github()
-
-        if not config:
-            task_logger.warning("Failed to fetch GitHub config")
-            return None
-
        # Sync to database
        with get_session_with_current_tenant() as db_session:
-            results = sync_llm_models_from_github(db_session, config)
+            results = sync_llm_models_from_github(db_session)

            if results:
                task_logger.info(f"Auto mode sync results: {results}")
--- a/backend/onyx/chat/chat_processing_checker.py
+++ b/backend/onyx/chat/chat_processing_checker.py
@@ -0,0 +1,57 @@
+from uuid import UUID
+
+from redis.client import Redis
+
+# Redis key prefixes for chat message processing
+PREFIX = "chatprocessing"
+FENCE_PREFIX = f"{PREFIX}_fence"
+FENCE_TTL = 30 * 60  # 30 minutes
+
+
+def _get_fence_key(chat_session_id: UUID) -> str:
+    """
+    Generate the Redis key for a chat session processing a message.
+
+    Args:
+        chat_session_id: The UUID of the chat session
+
+    Returns:
+        The fence key string (tenant_id is automatically added by the Redis client)
+    """
+    return f"{FENCE_PREFIX}_{chat_session_id}"
+
+
+def set_processing_status(
+    chat_session_id: UUID, redis_client: Redis, value: bool
+) -> None:
+    """
+    Set or clear the fence for a chat session processing a message.
+
+    If the key exists, we are processing a message. If the key does not exist, we are not processing a message.
+
+    Args:
+        chat_session_id: The UUID of the chat session
+        redis_client: The Redis client to use
+        value: True to set the fence, False to clear it
+    """
+    fence_key = _get_fence_key(chat_session_id)
+
+    if value:
+        redis_client.set(fence_key, 0, ex=FENCE_TTL)
+    else:
+        redis_client.delete(fence_key)
+
+
+def is_chat_session_processing(chat_session_id: UUID, redis_client: Redis) -> bool:
+    """
+    Check if the chat session is processing a message.
+
+    Args:
+        chat_session_id: The UUID of the chat session
+        redis_client: The Redis client to use
+
+    Returns:
+        True if the chat session is processing a message, False otherwise
+    """
+    fence_key = _get_fence_key(chat_session_id)
+    return bool(redis_client.exists(fence_key))
--- a/backend/onyx/chat/chat_state.py
+++ b/backend/onyx/chat/chat_state.py
@@ -94,6 +94,7 @@ class ChatStateContainer:

 def run_chat_loop_with_state_containers(
    func: Callable[..., None],
+    completion_callback: Callable[[ChatStateContainer], None],
    is_connected: Callable[[], bool],
    emitter: Emitter,
    state_container: ChatStateContainer,
@@ -196,3 +197,12 @@ def run_chat_loop_with_state_containers(
        # Skip waiting if user disconnected to exit quickly.
        if is_connected():
            wait_on_background(thread)
+        try:
+            completion_callback(state_container)
+        except Exception as e:
+            emitter.emit(
+                Packet(
+                    placement=Placement(turn_index=last_turn_index + 1),
+                    obj=PacketException(type="error", exception=e),
+                )
+            )
--- a/backend/onyx/chat/chat_utils.py
+++ b/backend/onyx/chat/chat_utils.py
@@ -55,6 +55,7 @@ from onyx.prompts.chat_prompts import TOOL_CALL_RESPONSE_CROSS_MESSAGE
 from onyx.prompts.tool_prompts import TOOL_CALL_FAILURE_PROMPT
 from onyx.server.query_and_chat.models import ChatSessionCreationRequest
 from onyx.server.query_and_chat.models import CreateChatMessageRequest
+from onyx.server.query_and_chat.models import MessageOrigin
 from onyx.server.query_and_chat.streaming_models import CitationInfo
 from onyx.tools.models import ToolCallKickoff
 from onyx.tools.tool_implementations.custom.custom_tool import (
@@ -117,6 +118,7 @@ def prepare_chat_message_request(
    llm_override: LLMOverride | None = None,
    allowed_tool_ids: list[int] | None = None,
    forced_tool_ids: list[int] | None = None,
+    origin: MessageOrigin | None = None,
 ) -> CreateChatMessageRequest:
    # Typically used for one shot flows like SlackBot or non-chat API endpoint use cases
    new_chat_session = create_chat_session(
@@ -144,6 +146,7 @@ def prepare_chat_message_request(
        llm_override=llm_override,
        allowed_tool_ids=allowed_tool_ids,
        forced_tool_ids=forced_tool_ids,
+        origin=origin or MessageOrigin.UNKNOWN,
    )


--- a/backend/onyx/chat/llm_loop.py
+++ b/backend/onyx/chat/llm_loop.py
@@ -505,7 +505,7 @@ def run_llm_loop(
            # in-flight citations
            # It can be cleaned up but not super trivial or worthwhile right now
            just_ran_web_search = False
-            tool_responses, citation_mapping = run_tool_calls(
+            parallel_tool_call_results = run_tool_calls(
                tool_calls=tool_calls,
                tools=final_tools,
                message_history=truncated_message_history,
@@ -516,6 +516,8 @@ def run_llm_loop(
                max_concurrent_tools=None,
                skip_search_query_expansion=has_called_search_tool,
            )
+            tool_responses = parallel_tool_call_results.tool_responses
+            citation_mapping = parallel_tool_call_results.updated_citation_mapping

            # Failure case, give something reasonable to the LLM to try again
            if tool_calls and not tool_responses:
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@@ -5,10 +5,13 @@ An overview can be found in the README.md file in this directory.

 import re
 import traceback
+from collections.abc import Callable
 from uuid import UUID

+from redis.client import Redis
 from sqlalchemy.orm import Session

+from onyx.chat.chat_processing_checker import set_processing_status
 from onyx.chat.chat_state import ChatStateContainer
 from onyx.chat.chat_state import run_chat_loop_with_state_containers
 from onyx.chat.chat_utils import convert_chat_history
@@ -45,6 +48,8 @@ from onyx.db.chat import get_chat_session_by_id
 from onyx.db.chat import get_or_create_root_message
 from onyx.db.chat import reserve_message_id
 from onyx.db.memory import get_memories
+from onyx.db.models import ChatMessage
+from onyx.db.models import ChatSession
 from onyx.db.models import User
 from onyx.db.projects import get_project_token_count
 from onyx.db.projects import get_user_files_from_project
@@ -78,20 +83,16 @@ from onyx.utils.logger import setup_logger
 from onyx.utils.long_term_log import LongTermLogger
 from onyx.utils.telemetry import mt_cloud_telemetry
 from onyx.utils.timing import log_function_time
+from onyx.utils.variable_functionality import (
+    fetch_versioned_implementation_with_fallback,
+)
+from onyx.utils.variable_functionality import noop_fallback
 from shared_configs.contextvars import get_current_tenant_id

 logger = setup_logger()
 ERROR_TYPE_CANCELLED = "cancelled"


-class ToolCallException(Exception):
-    """Exception raised for errors during tool calls."""
-
-    def __init__(self, message: str, tool_name: str | None = None):
-        super().__init__(message)
-        self.tool_name = tool_name
-
-
 def _extract_project_file_texts_and_images(
    project_id: int | None,
    user_id: UUID | None,
@@ -294,6 +295,8 @@ def handle_stream_message_objects(
    tenant_id = get_current_tenant_id()

    llm: LLM | None = None
+    chat_session: ChatSession | None = None
+    redis_client: Redis | None = None

    user_id = user.id if user is not None else None
    llm_user_identifier = (
@@ -339,6 +342,24 @@ def handle_stream_message_objects(
            event=MilestoneRecordType.MULTIPLE_ASSISTANTS,
        )

+        # Track user message in PostHog for analytics
+        fetch_versioned_implementation_with_fallback(
+            module="onyx.utils.telemetry",
+            attribute="event_telemetry",
+            fallback=noop_fallback,
+        )(
+            distinct_id=user.email if user else tenant_id,
+            event="user_message_sent",
+            properties={
+                "origin": new_msg_req.origin.value,
+                "has_files": len(new_msg_req.file_descriptors) > 0,
+                "has_project": chat_session.project_id is not None,
+                "has_persona": persona is not None and persona.id != DEFAULT_PERSONA_ID,
+                "deep_research": new_msg_req.deep_research,
+                "tenant_id": tenant_id,
+            },
+        )
+
        llm = get_llm_for_persona(
            persona=persona,
            user=user,
@@ -380,7 +401,10 @@ def handle_stream_message_objects(
        if new_msg_req.parent_message_id == AUTO_PLACE_AFTER_LATEST_MESSAGE:
            # Auto-place after the latest message in the chain
            parent_message = chat_history[-1] if chat_history else root_message
-        elif new_msg_req.parent_message_id is None:
+        elif (
+            new_msg_req.parent_message_id is None
+            or new_msg_req.parent_message_id == root_message.id
+        ):
            # None = regeneration from root
            parent_message = root_message
            # Truncate history since we're starting from root
@@ -536,10 +560,27 @@ def handle_stream_message_objects(
        def check_is_connected() -> bool:
            return check_stop_signal(chat_session.id, redis_client)

+        set_processing_status(
+            chat_session_id=chat_session.id,
+            redis_client=redis_client,
+            value=True,
+        )
+
        # Use external state container if provided, otherwise create internal one
        # External container allows non-streaming callers to access accumulated state
        state_container = external_state_container or ChatStateContainer()

+        def llm_loop_completion_callback(
+            state_container: ChatStateContainer,
+        ) -> None:
+            llm_loop_completion_handle(
+                state_container=state_container,
+                db_session=db_session,
+                chat_session_id=str(chat_session.id),
+                is_connected=check_is_connected,
+                assistant_message=assistant_response,
+            )
+
        # Run the LLM loop with explicit wrapper for stop signal handling
        # The wrapper runs run_llm_loop in a background thread and polls every 300ms
        # for stop signals. run_llm_loop itself doesn't know about stopping.
@@ -555,6 +596,7 @@ def handle_stream_message_objects(

            yield from run_chat_loop_with_state_containers(
                run_deep_research_llm_loop,
+                llm_loop_completion_callback,
                is_connected=check_is_connected,
                emitter=emitter,
                state_container=state_container,
@@ -571,6 +613,7 @@ def handle_stream_message_objects(
        else:
            yield from run_chat_loop_with_state_containers(
                run_llm_loop,
+                llm_loop_completion_callback,
                is_connected=check_is_connected,  # Not passed through to run_llm_loop
                emitter=emitter,
                state_container=state_container,
@@ -588,51 +631,6 @@ def handle_stream_message_objects(
                chat_session_id=str(chat_session.id),
            )

-        # Determine if stopped by user
-        completed_normally = check_is_connected()
-        if not completed_normally:
-            logger.debug(f"Chat session {chat_session.id} stopped by user")
-
-        # Build final answer based on completion status
-        if completed_normally:
-            if state_container.answer_tokens is None:
-                raise RuntimeError(
-                    "LLM run completed normally but did not return an answer."
-                )
-            final_answer = state_container.answer_tokens
-        else:
-            # Stopped by user - append stop message
-            if state_container.answer_tokens:
-                final_answer = (
-                    state_container.answer_tokens
-                    + " ... The generation was stopped by the user here."
-                )
-            else:
-                final_answer = "The generation was stopped by the user."
-
-        # Build citation_docs_info from accumulated citations in state container
-        citation_docs_info: list[CitationDocInfo] = []
-        seen_citation_nums: set[int] = set()
-        for citation_num, search_doc in state_container.citation_to_doc.items():
-            if citation_num not in seen_citation_nums:
-                seen_citation_nums.add(citation_num)
-                citation_docs_info.append(
-                    CitationDocInfo(
-                        search_doc=search_doc,
-                        citation_number=citation_num,
-                    )
-                )
-
-        save_chat_turn(
-            message_text=final_answer,
-            reasoning_tokens=state_container.reasoning_tokens,
-            citation_docs_info=citation_docs_info,
-            tool_calls=state_container.tool_calls,
-            db_session=db_session,
-            assistant_message=assistant_response,
-            is_clarification=state_container.is_clarification,
-        )
-
    except ValueError as e:
        logger.exception("Failed to process chat message.")

@@ -650,15 +648,7 @@ def handle_stream_message_objects(
        error_msg = str(e)
        stack_trace = traceback.format_exc()

-        if isinstance(e, ToolCallException):
-            yield StreamingError(
-                error=error_msg,
-                stack_trace=stack_trace,
-                error_code="TOOL_CALL_FAILED",
-                is_retryable=True,
-                details={"tool_name": e.tool_name} if e.tool_name else None,
-            )
-        elif llm:
+        if llm:
            client_error_msg, error_code, is_retryable = litellm_exception_to_error_msg(
                e, llm
            )
@@ -690,7 +680,67 @@ def handle_stream_message_objects(
            )

        db_session.rollback()
-        return
+    finally:
+        try:
+            if redis_client is not None and chat_session is not None:
+                set_processing_status(
+                    chat_session_id=chat_session.id,
+                    redis_client=redis_client,
+                    value=False,
+                )
+        except Exception:
+            logger.exception("Error in setting processing status")
+
+
+def llm_loop_completion_handle(
+    state_container: ChatStateContainer,
+    is_connected: Callable[[], bool],
+    db_session: Session,
+    chat_session_id: str,
+    assistant_message: ChatMessage,
+) -> None:
+    # Determine if stopped by user
+    completed_normally = is_connected()
+    # Build final answer based on completion status
+    if completed_normally:
+        if state_container.answer_tokens is None:
+            raise RuntimeError(
+                "LLM run completed normally but did not return an answer."
+            )
+        final_answer = state_container.answer_tokens
+    else:
+        # Stopped by user - append stop message
+        logger.debug(f"Chat session {chat_session_id} stopped by user")
+        if state_container.answer_tokens:
+            final_answer = (
+                state_container.answer_tokens
+                + " ... \n\nGeneration was stopped by the user."
+            )
+        else:
+            final_answer = "The generation was stopped by the user."
+
+    # Build citation_docs_info from accumulated citations in state container
+    citation_docs_info: list[CitationDocInfo] = []
+    seen_citation_nums: set[int] = set()
+    for citation_num, search_doc in state_container.citation_to_doc.items():
+        if citation_num not in seen_citation_nums:
+            seen_citation_nums.add(citation_num)
+            citation_docs_info.append(
+                CitationDocInfo(
+                    search_doc=search_doc,
+                    citation_number=citation_num,
+                )
+            )
+
+    save_chat_turn(
+        message_text=final_answer,
+        reasoning_tokens=state_container.reasoning_tokens,
+        citation_docs_info=citation_docs_info,
+        tool_calls=state_container.tool_calls,
+        db_session=db_session,
+        assistant_message=assistant_message,
+        is_clarification=state_container.is_clarification,
+    )


 def stream_chat_message_objects(
@@ -739,6 +789,7 @@ def stream_chat_message_objects(
        deep_research=new_msg_req.deep_research,
        parent_message_id=new_msg_req.parent_message_id,
        chat_session_id=new_msg_req.chat_session_id,
+        origin=new_msg_req.origin,
    )
    return handle_stream_message_objects(
        new_msg_req=translated_new_msg_req,
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -568,6 +568,7 @@ JIRA_CONNECTOR_LABELS_TO_SKIP = [
 JIRA_CONNECTOR_MAX_TICKET_SIZE = int(
    os.environ.get("JIRA_CONNECTOR_MAX_TICKET_SIZE", 100 * 1024)
 )
+JIRA_SLIM_PAGE_SIZE = int(os.environ.get("JIRA_SLIM_PAGE_SIZE", 500))

 GONG_CONNECTOR_START_TIME = os.environ.get("GONG_CONNECTOR_START_TIME")

@@ -995,3 +996,9 @@ COHERE_DEFAULT_API_KEY = os.environ.get("COHERE_DEFAULT_API_KEY")
 VERTEXAI_DEFAULT_CREDENTIALS = os.environ.get("VERTEXAI_DEFAULT_CREDENTIALS")
 VERTEXAI_DEFAULT_LOCATION = os.environ.get("VERTEXAI_DEFAULT_LOCATION", "global")
 OPENROUTER_DEFAULT_API_KEY = os.environ.get("OPENROUTER_DEFAULT_API_KEY")
+
+INSTANCE_TYPE = (
+    "managed"
+    if os.environ.get("IS_MANAGED_INSTANCE", "").lower() == "true"
+    else "cloud" if AUTH_TYPE == AuthType.CLOUD else "self_hosted"
+)
--- a/backend/onyx/configs/constants.py
+++ b/backend/onyx/configs/constants.py
@@ -7,6 +7,7 @@ from enum import Enum

 ONYX_DEFAULT_APPLICATION_NAME = "Onyx"
 ONYX_DISCORD_URL = "https://discord.gg/4NA5SbzrWb"
+ONYX_UTM_SOURCE = "onyx_app"
 SLACK_USER_TOKEN_PREFIX = "xoxp-"
 SLACK_BOT_TOKEN_PREFIX = "xoxb-"
 ONYX_EMAILABLE_LOGO_MAX_DIM = 512
@@ -235,6 +236,7 @@ class NotificationType(str, Enum):
    PERSONA_SHARED = "persona_shared"
    TRIAL_ENDS_TWO_DAYS = "two_day_trial_ending"  # 2 days left in trial
    RELEASE_NOTES = "release_notes"
+    ASSISTANT_FILES_READY = "assistant_files_ready"


 class BlobType(str, Enum):
@@ -422,6 +424,9 @@ class OnyxRedisLocks:
    USER_FILE_DELETE_BEAT_LOCK = "da_lock:check_user_file_delete_beat"
    USER_FILE_DELETE_LOCK_PREFIX = "da_lock:user_file_delete"

+    # Release notes
+    RELEASE_NOTES_FETCH_LOCK = "da_lock:release_notes_fetch"
+

 class OnyxRedisSignals:
    BLOCK_VALIDATE_INDEXING_FENCES = "signal:block_validate_indexing_fences"
--- a/backend/onyx/connectors/README.md
+++ b/backend/onyx/connectors/README.md
@@ -93,7 +93,7 @@ if __name__ == "__main__":
 #### Docs Changes

 Create the new connector page (with guiding images!) with how to get the connector credentials and how to set up the
-connector in Onyx. Then create a Pull Request in https://github.com/onyx-dot-app/onyx-docs.
+connector in Onyx. Then create a Pull Request in [https://github.com/onyx-dot-app/documentation](https://github.com/onyx-dot-app/documentation).

 ### Before opening PR

--- a/backend/onyx/connectors/confluence/onyx_confluence.py
+++ b/backend/onyx/connectors/confluence/onyx_confluence.py
@@ -901,13 +901,16 @@ class OnyxConfluence:
        space_key: str,
    ) -> list[dict[str, Any]]:
        """
-        This is a confluence server specific method that can be used to
+        This is a confluence server/data center specific method that can be used to
        fetch the permissions of a space.
-        This is better logging than calling the get_space_permissions method
-        because it returns a jsonrpc response.
-        TODO: Make this call these endpoints for newer confluence versions:
-        - /rest/api/space/{spaceKey}/permissions
-        - /rest/api/space/{spaceKey}/permissions/anonymous
+
+        NOTE: This uses the JSON-RPC API which is the ONLY way to get space permissions
+        on Confluence Server/Data Center. The REST API equivalent (expand=permissions)
+        is Cloud-only and not available on Data Center as of version 8.9.x.
+
+        If this fails with 401 Unauthorized, the customer needs to enable JSON-RPC:
+        Confluence Admin -> General Configuration -> Further Configuration
+        -> Enable "Remote API (XML-RPC & SOAP)"
        """
        url = "rpc/json-rpc/confluenceservice-v2"
        data = {
@@ -916,7 +919,18 @@ class OnyxConfluence:
            "id": 7,
            "params": [space_key],
        }
-        response = self.post(url, data=data)
+        try:
+            response = self.post(url, data=data)
+        except HTTPError as e:
+            if e.response is not None and e.response.status_code == 401:
+                raise HTTPError(
+                    "Unauthorized (401) when calling JSON-RPC API for space permissions. "
+                    "This is likely because the Remote API is disabled. "
+                    "To fix: Confluence Admin -> General Configuration -> Further Configuration "
+                    "-> Enable 'Remote API (XML-RPC & SOAP)'",
+                    response=e.response,
+                ) from e
+            raise
        logger.debug(f"jsonrpc response: {response}")
        if not response.get("result"):
            logger.warning(
--- a/backend/onyx/connectors/jira/connector.py
+++ b/backend/onyx/connectors/jira/connector.py
@@ -18,6 +18,7 @@ from typing_extensions import override
 from onyx.configs.app_configs import INDEX_BATCH_SIZE
 from onyx.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
 from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
+from onyx.configs.app_configs import JIRA_SLIM_PAGE_SIZE
 from onyx.configs.constants import DocumentSource
 from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    is_atlassian_date_error,
@@ -57,7 +58,6 @@ logger = setup_logger()
 ONE_HOUR = 3600

 _MAX_RESULTS_FETCH_IDS = 5000  # 5000
-_JIRA_SLIM_PAGE_SIZE = 500
 _JIRA_FULL_PAGE_SIZE = 50

 # Constants for Jira field names
@@ -683,7 +683,7 @@ class JiraConnector(
                jira_client=self.jira_client,
                jql=jql,
                start=current_offset,
-                max_results=_JIRA_SLIM_PAGE_SIZE,
+                max_results=JIRA_SLIM_PAGE_SIZE,
                all_issue_ids=checkpoint.all_issue_ids,
                checkpoint_callback=checkpoint_callback,
                nextPageToken=checkpoint.cursor,
@@ -703,11 +703,11 @@ class JiraConnector(
                    )
                )
                current_offset += 1
-                if len(slim_doc_batch) >= _JIRA_SLIM_PAGE_SIZE:
+                if len(slim_doc_batch) >= JIRA_SLIM_PAGE_SIZE:
                    yield slim_doc_batch
                    slim_doc_batch = []
            self.update_checkpoint_for_next_run(
-                checkpoint, current_offset, prev_offset, _JIRA_SLIM_PAGE_SIZE
+                checkpoint, current_offset, prev_offset, JIRA_SLIM_PAGE_SIZE
            )
            prev_offset = current_offset

--- a/backend/onyx/db/chat.py
+++ b/backend/onyx/db/chat.py
@@ -1,6 +1,7 @@
 from collections.abc import Sequence
 from datetime import datetime
 from datetime import timedelta
+from datetime import timezone
 from typing import Tuple
 from uuid import UUID

@@ -181,7 +182,11 @@ def get_chat_sessions_by_user(
            .correlate(ChatSession)
        )

-        stmt = stmt.where(non_system_message_exists_subq)
+        # Leeway for newly created chats that don't have messages yet
+        time = datetime.now(timezone.utc) - timedelta(minutes=5)
+        recently_created = ChatSession.time_created >= time
+
+        stmt = stmt.where(or_(non_system_message_exists_subq, recently_created))

    result = db_session.execute(stmt)
    chat_sessions = result.scalars().all()
--- a/backend/onyx/db/llm.py
+++ b/backend/onyx/db/llm.py
@@ -374,7 +374,7 @@ def fetch_existing_tools(db_session: Session, tool_ids: list[int]) -> list[ToolM
 def fetch_existing_llm_providers(
    db_session: Session,
    only_public: bool = False,
-    exclude_image_generation_providers: bool = False,
+    exclude_image_generation_providers: bool = True,
 ) -> list[LLMProviderModel]:
    """Fetch all LLM providers with optional filtering.

@@ -585,13 +585,12 @@ def update_default_vision_provider(

 def fetch_auto_mode_providers(db_session: Session) -> list[LLMProviderModel]:
    """Fetch all LLM providers that are in Auto mode."""
-    return list(
-        db_session.scalars(
-            select(LLMProviderModel)
-            .where(LLMProviderModel.is_auto_mode == True)  # noqa: E712
-            .options(selectinload(LLMProviderModel.model_configurations))
-        ).all()
+    query = (
+        select(LLMProviderModel)
+        .where(LLMProviderModel.is_auto_mode.is_(True))
+        .options(selectinload(LLMProviderModel.model_configurations))
    )
+    return list(db_session.scalars(query).all())


 def sync_auto_mode_models(
@@ -620,7 +619,9 @@ def sync_auto_mode_models(

    # Build the list of all visible models from the config
    # All models in the config are visible (default + additional_visible_models)
-    recommended_visible_models = llm_recommendations.get_visible_models(provider.name)
+    recommended_visible_models = llm_recommendations.get_visible_models(
+        provider.provider
+    )
    recommended_visible_model_names = [
        model.name for model in recommended_visible_models
    ]
@@ -635,11 +636,12 @@ def sync_auto_mode_models(
        ).all()
    }

-    # Remove models that are no longer in GitHub config
+    # Mark models that are no longer in GitHub config as not visible
    for model_name, model in existing_models.items():
        if model_name not in recommended_visible_model_names:
-            db_session.delete(model)
-            changes += 1
+            if model.is_visible:
+                model.is_visible = False
+                changes += 1

    # Add or update models from GitHub config
    for model_config in recommended_visible_models:
@@ -669,7 +671,7 @@ def sync_auto_mode_models(
            changes += 1

    # In Auto mode, default model is always set from GitHub config
-    default_model = llm_recommendations.get_default_model(provider.name)
+    default_model = llm_recommendations.get_default_model(provider.provider)
    if default_model and provider.default_model_name != default_model.name:
        provider.default_model_name = default_model.name
        changes += 1
--- a/backend/onyx/db/models.py
+++ b/backend/onyx/db/models.py
@@ -377,6 +377,17 @@ class Notification(Base):
        postgresql.JSONB(), nullable=True
    )

+    # Unique constraint ix_notification_user_type_data on (user_id, notif_type, additional_data)
+    # ensures notification deduplication for batch inserts. Defined in migration 8405ca81cc83.
+    __table_args__ = (
+        Index(
+            "ix_notification_user_sort",
+            "user_id",
+            "dismissed",
+            desc("first_shown"),
+        ),
+    )
+

 """
 Association Tables
@@ -2605,6 +2616,7 @@ class Tool(Base):
    __tablename__ = "tool"

    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    # The name of the tool that the LLM will see
    name: Mapped[str] = mapped_column(String, nullable=False)
    description: Mapped[str] = mapped_column(Text, nullable=True)
    # ID of the tool in the codebase, only applies for in-code tools.
--- a/backend/onyx/db/notification.py
+++ b/backend/onyx/db/notification.py
@@ -1,6 +1,11 @@
+from datetime import datetime
+from datetime import timezone
 from uuid import UUID

+from sqlalchemy import cast
 from sqlalchemy import select
+from sqlalchemy.dialects import postgresql
+from sqlalchemy.dialects.postgresql import insert
 from sqlalchemy.orm import Session
 from sqlalchemy.sql import func

@@ -17,23 +22,33 @@ def create_notification(
    title: str,
    description: str | None = None,
    additional_data: dict | None = None,
+    autocommit: bool = True,
 ) -> Notification:
-    # Check if an undismissed notification of the same type and data exists
+    # Previously, we only matched the first identical, undismissed notification
+    # Now, we assume some uniqueness to notifications
+    # If we previously issued a notification that was dismissed, we no longer issue a new one
+
+    # Normalize additional_data to match the unique index behavior
+    # The index uses COALESCE(additional_data, '{}'::jsonb)
+    # We need to match this logic in our query
+    additional_data_normalized = additional_data if additional_data is not None else {}
+
    existing_notification = (
        db_session.query(Notification)
-        .filter_by(
-            user_id=user_id,
-            notif_type=notif_type,
-            dismissed=False,
+        .filter_by(user_id=user_id, notif_type=notif_type)
+        .filter(
+            func.coalesce(Notification.additional_data, cast({}, postgresql.JSONB))
+            == additional_data_normalized
        )
-        .filter(Notification.additional_data == additional_data)
        .first()
    )

    if existing_notification:
-        # Update the last_shown timestamp
-        existing_notification.last_shown = func.now()
-        db_session.commit()
+        # Update the last_shown timestamp if the notification is not dismissed
+        if not existing_notification.dismissed:
+            existing_notification.last_shown = func.now()
+            if autocommit:
+                db_session.commit()
        return existing_notification

    # Create a new notification if none exists
@@ -48,7 +63,8 @@ def create_notification(
        additional_data=additional_data,
    )
    db_session.add(notification)
-    db_session.commit()
+    if autocommit:
+        db_session.commit()
    return notification


@@ -81,6 +97,11 @@ def get_notifications(
        query = query.where(Notification.dismissed.is_(False))
    if notif_type:
        query = query.where(Notification.notif_type == notif_type)
+    # Sort: undismissed first, then by date (newest first)
+    query = query.order_by(
+        Notification.dismissed.asc(),
+        Notification.first_shown.desc(),
+    )
    return list(db_session.execute(query).scalars().all())


@@ -99,6 +120,63 @@ def dismiss_notification(notification: Notification, db_session: Session) -> Non
    db_session.commit()


+def batch_dismiss_notifications(
+    notifications: list[Notification],
+    db_session: Session,
+) -> None:
+    for notification in notifications:
+        notification.dismissed = True
+    db_session.commit()
+
+
+def batch_create_notifications(
+    user_ids: list[UUID],
+    notif_type: NotificationType,
+    db_session: Session,
+    title: str,
+    description: str | None = None,
+    additional_data: dict | None = None,
+) -> int:
+    """
+    Create notifications for multiple users in a single batch operation.
+    Uses ON CONFLICT DO NOTHING for atomic idempotent inserts - if a user already
+    has a notification with the same (user_id, notif_type, additional_data), the
+    insert is silently skipped.
+
+    Returns the number of notifications created.
+
+    Relies on unique index on (user_id, notif_type, COALESCE(additional_data, '{}'))
+    """
+    if not user_ids:
+        return 0
+
+    now = datetime.now(timezone.utc)
+    # Use empty dict instead of None to match COALESCE behavior in the unique index
+    additional_data_normalized = additional_data if additional_data is not None else {}
+
+    values = [
+        {
+            "user_id": uid,
+            "notif_type": notif_type.value,
+            "title": title,
+            "description": description,
+            "dismissed": False,
+            "last_shown": now,
+            "first_shown": now,
+            "additional_data": additional_data_normalized,
+        }
+        for uid in user_ids
+    ]
+
+    stmt = insert(Notification).values(values).on_conflict_do_nothing()
+    result = db_session.execute(stmt)
+    db_session.commit()
+
+    # rowcount returns number of rows inserted (excludes conflicts)
+    # CursorResult has rowcount but session.execute type hints are too broad
+    return result.rowcount if result.rowcount >= 0 else 0  # type: ignore[attr-defined]
+
+
 def update_notification_last_shown(
    notification: Notification, db_session: Session
 ) -> None:
--- a/backend/onyx/db/release_notes.py
+++ b/backend/onyx/db/release_notes.py
@@ -0,0 +1,94 @@
+"""Database functions for release notes functionality."""
+
+from urllib.parse import urlencode
+
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from onyx.auth.schemas import UserRole
+from onyx.configs.app_configs import INSTANCE_TYPE
+from onyx.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN
+from onyx.configs.constants import NotificationType
+from onyx.configs.constants import ONYX_UTM_SOURCE
+from onyx.db.models import User
+from onyx.db.notification import batch_create_notifications
+from onyx.server.features.release_notes.constants import DOCS_CHANGELOG_BASE_URL
+from onyx.server.features.release_notes.models import ReleaseNoteEntry
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def create_release_notifications_for_versions(
+    db_session: Session,
+    release_note_entries: list[ReleaseNoteEntry],
+) -> int:
+    """
+    Create release notes notifications for each release note entry.
+    Uses batch_create_notifications for efficient bulk insertion.
+
+    If a user already has a notification for a specific version (dismissed or not),
+    no new one is created (handled by unique constraint on additional_data).
+
+    Note: Entries should already be filtered by app_version before calling this
+    function. The filtering happens in _parse_mdx_to_release_note_entries().
+
+    Args:
+        db_session: Database session
+        release_note_entries: List of release note entries to notify about (pre-filtered)
+
+    Returns:
+        Total number of notifications created across all versions.
+    """
+    if not release_note_entries:
+        logger.debug("No release note entries to notify about")
+        return 0
+
+    # Get active users and exclude API key users
+    user_ids = list(
+        db_session.scalars(
+            select(User.id).where(  # type: ignore
+                User.is_active == True,  # noqa: E712
+                User.role.notin_([UserRole.SLACK_USER, UserRole.EXT_PERM_USER]),
+                User.email.endswith(DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN).is_(False),  # type: ignore[attr-defined]
+            )
+        ).all()
+    )
+
+    total_created = 0
+    for entry in release_note_entries:
+        # Convert version to anchor format for external docs links
+        # v2.7.0 -> v2-7-0
+        version_anchor = entry.version.replace(".", "-")
+
+        # Build UTM parameters for tracking
+        utm_params = {
+            "utm_source": ONYX_UTM_SOURCE,
+            "utm_medium": "notification",
+            "utm_campaign": INSTANCE_TYPE,
+            "utm_content": f"release_notes-{entry.version}",
+        }
+
+        link = f"{DOCS_CHANGELOG_BASE_URL}#{version_anchor}?{urlencode(utm_params)}"
+
+        additional_data: dict[str, str] = {
+            "version": entry.version,
+            "link": link,
+        }
+
+        created_count = batch_create_notifications(
+            user_ids,
+            NotificationType.RELEASE_NOTES,
+            db_session,
+            title=entry.title,
+            description=f"Check out what's new in {entry.version}",
+            additional_data=additional_data,
+        )
+        total_created += created_count
+
+        logger.debug(
+            f"Created {created_count} release notes notifications "
+            f"(version {entry.version}, {len(user_ids)} eligible users)"
+        )
+
+    return total_created
--- a/backend/onyx/deep_research/dr_loop.py
+++ b/backend/onyx/deep_research/dr_loop.py
@@ -21,7 +21,6 @@ from onyx.configs.constants import MessageType
 from onyx.db.tools import get_tool_by_name
 from onyx.deep_research.dr_mock_tools import get_clarification_tool_definitions
 from onyx.deep_research.dr_mock_tools import get_orchestrator_tools
-from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_DB_NAME
 from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_TOOL_NAME
 from onyx.deep_research.dr_mock_tools import THINK_TOOL_RESPONSE_MESSAGE
 from onyx.deep_research.dr_mock_tools import THINK_TOOL_RESPONSE_TOKEN_COUNT
@@ -150,6 +149,9 @@ def generate_final_report(
            is_deep_research=True,
        )

+        # Save citation mapping to state_container so citations are persisted
+        state_container.set_citation_mapping(citation_processor.citation_to_doc)
+
        final_report = llm_step_result.answer
        if final_report is None:
            raise ValueError("LLM failed to generate the final deep research report")
@@ -217,35 +219,90 @@ def run_deep_research_llm_loop(
            else ""
        )
        if not skip_clarification:
-            clarification_prompt = CLARIFICATION_PROMPT.format(
-                current_datetime=get_current_llm_day_time(full_sentence=False),
-                internal_search_clarification_guidance=internal_search_clarification_guidance,
-            )
+            with function_span("clarification_step") as span:
+                clarification_prompt = CLARIFICATION_PROMPT.format(
+                    current_datetime=get_current_llm_day_time(full_sentence=False),
+                    internal_search_clarification_guidance=internal_search_clarification_guidance,
+                )
+                system_prompt = ChatMessageSimple(
+                    message=clarification_prompt,
+                    token_count=300,  # Skips the exact token count but has enough leeway
+                    message_type=MessageType.SYSTEM,
+                )
+
+                truncated_message_history = construct_message_history(
+                    system_prompt=system_prompt,
+                    custom_agent_prompt=None,
+                    simple_chat_history=simple_chat_history,
+                    reminder_message=None,
+                    project_files=None,
+                    available_tokens=available_tokens,
+                    last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
+                )
+
+                llm_step_result, _ = run_llm_step(
+                    emitter=emitter,
+                    history=truncated_message_history,
+                    tool_definitions=get_clarification_tool_definitions(),
+                    tool_choice=ToolChoiceOptions.AUTO,
+                    llm=llm,
+                    placement=Placement(turn_index=0),
+                    # No citations in this step, it should just pass through all
+                    # tokens directly so initialized as an empty citation processor
+                    citation_processor=None,
+                    state_container=state_container,
+                    final_documents=None,
+                    user_identity=user_identity,
+                    is_deep_research=True,
+                )
+
+                if not llm_step_result.tool_calls:
+                    # Mark this turn as a clarification question
+                    state_container.set_is_clarification(True)
+                    span.span_data.output = "clarification_required"
+
+                    emitter.emit(
+                        Packet(
+                            placement=Placement(turn_index=0),
+                            obj=OverallStop(type="stop"),
+                        )
+                    )
+
+                    # If a clarification is asked, we need to end this turn and wait on user input
+                    return
+
+        #########################################################
+        # RESEARCH PLAN STEP
+        #########################################################
+        with function_span("research_plan_step") as span:
            system_prompt = ChatMessageSimple(
-                message=clarification_prompt,
-                token_count=300,  # Skips the exact token count but has enough leeway
+                message=RESEARCH_PLAN_PROMPT.format(
+                    current_datetime=get_current_llm_day_time(full_sentence=False)
+                ),
+                token_count=300,
                message_type=MessageType.SYSTEM,
            )
-
+            reminder_message = ChatMessageSimple(
+                message=RESEARCH_PLAN_REMINDER,
+                token_count=100,
+                message_type=MessageType.USER,
+            )
            truncated_message_history = construct_message_history(
                system_prompt=system_prompt,
                custom_agent_prompt=None,
-                simple_chat_history=simple_chat_history,
+                simple_chat_history=simple_chat_history + [reminder_message],
                reminder_message=None,
                project_files=None,
                available_tokens=available_tokens,
-                last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
+                last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT + 1,
            )

-            llm_step_result, _ = run_llm_step(
-                emitter=emitter,
+            research_plan_generator = run_llm_step_pkt_generator(
                history=truncated_message_history,
-                tool_definitions=get_clarification_tool_definitions(),
-                tool_choice=ToolChoiceOptions.AUTO,
+                tool_definitions=[],
+                tool_choice=ToolChoiceOptions.NONE,
                llm=llm,
                placement=Placement(turn_index=0),
-                # No citations in this step, it should just pass through all
-                # tokens directly so initialized as an empty citation processor
                citation_processor=None,
                state_container=state_container,
                final_documents=None,
@@ -253,301 +310,177 @@ def run_deep_research_llm_loop(
                is_deep_research=True,
            )

-            if not llm_step_result.tool_calls:
-                # Mark this turn as a clarification question
-                state_container.set_is_clarification(True)
-
-                emitter.emit(
-                    Packet(
-                        placement=Placement(turn_index=0), obj=OverallStop(type="stop")
-                    )
-                )
-
-                # If a clarification is asked, we need to end this turn and wait on user input
-                return
-
-        #########################################################
-        # RESEARCH PLAN STEP
-        #########################################################
-        system_prompt = ChatMessageSimple(
-            message=RESEARCH_PLAN_PROMPT.format(
-                current_datetime=get_current_llm_day_time(full_sentence=False)
-            ),
-            token_count=300,
-            message_type=MessageType.SYSTEM,
-        )
-        reminder_message = ChatMessageSimple(
-            message=RESEARCH_PLAN_REMINDER,
-            token_count=100,
-            message_type=MessageType.USER,
-        )
-        truncated_message_history = construct_message_history(
-            system_prompt=system_prompt,
-            custom_agent_prompt=None,
-            simple_chat_history=simple_chat_history + [reminder_message],
-            reminder_message=None,
-            project_files=None,
-            available_tokens=available_tokens,
-            last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT + 1,
-        )
-
-        research_plan_generator = run_llm_step_pkt_generator(
-            history=truncated_message_history,
-            tool_definitions=[],
-            tool_choice=ToolChoiceOptions.NONE,
-            llm=llm,
-            placement=Placement(turn_index=0),
-            citation_processor=None,
-            state_container=state_container,
-            final_documents=None,
-            user_identity=user_identity,
-            is_deep_research=True,
-        )
-
-        while True:
-            try:
-                packet = next(research_plan_generator)
-                # Translate AgentResponseStart/Delta packets to DeepResearchPlanStart/Delta
-                # The LLM response from this prompt is the research plan
-                if isinstance(packet.obj, AgentResponseStart):
+            while True:
+                try:
+                    packet = next(research_plan_generator)
+                    # Translate AgentResponseStart/Delta packets to DeepResearchPlanStart/Delta
+                    # The LLM response from this prompt is the research plan
+                    if isinstance(packet.obj, AgentResponseStart):
+                        emitter.emit(
+                            Packet(
+                                placement=packet.placement,
+                                obj=DeepResearchPlanStart(),
+                            )
+                        )
+                    elif isinstance(packet.obj, AgentResponseDelta):
+                        emitter.emit(
+                            Packet(
+                                placement=packet.placement,
+                                obj=DeepResearchPlanDelta(content=packet.obj.content),
+                            )
+                        )
+                    else:
+                        # Pass through other packet types (e.g., ReasoningStart, ReasoningDelta, etc.)
+                        emitter.emit(packet)
+                except StopIteration as e:
+                    llm_step_result, reasoned = e.value
                    emitter.emit(
                        Packet(
-                            placement=packet.placement,
-                            obj=DeepResearchPlanStart(),
+                            # Marks the last turn end which should be the plan generation
+                            placement=Placement(
+                                turn_index=1 if reasoned else 0,
+                            ),
+                            obj=SectionEnd(),
                        )
                    )
-                elif isinstance(packet.obj, AgentResponseDelta):
-                    emitter.emit(
-                        Packet(
-                            placement=packet.placement,
-                            obj=DeepResearchPlanDelta(content=packet.obj.content),
-                        )
-                    )
-                else:
-                    # Pass through other packet types (e.g., ReasoningStart, ReasoningDelta, etc.)
-                    emitter.emit(packet)
-            except StopIteration as e:
-                llm_step_result, reasoned = e.value
-                emitter.emit(
-                    Packet(
-                        # Marks the last turn end which should be the plan generation
-                        placement=Placement(
-                            turn_index=1 if reasoned else 0,
-                        ),
-                        obj=SectionEnd(),
-                    )
-                )
-                if reasoned:
-                    orchestrator_start_turn_index += 1
-                break
-        llm_step_result = cast(LlmStepResult, llm_step_result)
+                    if reasoned:
+                        orchestrator_start_turn_index += 1
+                    break
+            llm_step_result = cast(LlmStepResult, llm_step_result)

-        research_plan = llm_step_result.answer
+            research_plan = llm_step_result.answer
+            span.span_data.output = research_plan if research_plan else None

        #########################################################
        # RESEARCH EXECUTION STEP
        #########################################################
-        is_reasoning_model = model_is_reasoning_model(
-            llm.config.model_name, llm.config.model_provider
-        )
+        with function_span("research_execution_step") as span:
+            is_reasoning_model = model_is_reasoning_model(
+                llm.config.model_name, llm.config.model_provider
+            )

-        max_orchestrator_cycles = (
-            MAX_ORCHESTRATOR_CYCLES
-            if not is_reasoning_model
-            else MAX_ORCHESTRATOR_CYCLES_REASONING
-        )
+            max_orchestrator_cycles = (
+                MAX_ORCHESTRATOR_CYCLES
+                if not is_reasoning_model
+                else MAX_ORCHESTRATOR_CYCLES_REASONING
+            )

-        orchestrator_prompt_template = (
-            ORCHESTRATOR_PROMPT
-            if not is_reasoning_model
-            else ORCHESTRATOR_PROMPT_REASONING
-        )
+            orchestrator_prompt_template = (
+                ORCHESTRATOR_PROMPT
+                if not is_reasoning_model
+                else ORCHESTRATOR_PROMPT_REASONING
+            )

-        internal_search_research_task_guidance = (
-            INTERNAL_SEARCH_RESEARCH_TASK_GUIDANCE
-            if include_internal_search_tunings
-            else ""
-        )
-        token_count_prompt = orchestrator_prompt_template.format(
-            current_datetime=get_current_llm_day_time(full_sentence=False),
-            current_cycle_count=1,
-            max_cycles=max_orchestrator_cycles,
-            research_plan=research_plan,
-            internal_search_research_task_guidance=internal_search_research_task_guidance,
-        )
-        orchestration_tokens = token_counter(token_count_prompt)
-
-        reasoning_cycles = 0
-        most_recent_reasoning: str | None = None
-        citation_mapping: CitationMapping = {}
-        final_turn_index: int = (
-            orchestrator_start_turn_index  # Track the final turn_index for stop packet
-        )
-        for cycle in range(max_orchestrator_cycles):
-            if cycle == max_orchestrator_cycles - 1:
-                # If it's the last cycle, forcibly generate the final report
-                report_turn_index = (
-                    orchestrator_start_turn_index + cycle + reasoning_cycles
-                )
-                report_reasoned = generate_final_report(
-                    history=simple_chat_history,
-                    llm=llm,
-                    token_counter=token_counter,
-                    state_container=state_container,
-                    emitter=emitter,
-                    turn_index=report_turn_index,
-                    citation_mapping=citation_mapping,
-                    user_identity=user_identity,
-                )
-                # Update final_turn_index: base + 1 for the report itself + 1 if reasoning occurred
-                final_turn_index = report_turn_index + (1 if report_reasoned else 0)
-                break
-
-            research_agent_calls: list[ToolCallKickoff] = []
-
-            orchestrator_prompt = orchestrator_prompt_template.format(
+            internal_search_research_task_guidance = (
+                INTERNAL_SEARCH_RESEARCH_TASK_GUIDANCE
+                if include_internal_search_tunings
+                else ""
+            )
+            token_count_prompt = orchestrator_prompt_template.format(
                current_datetime=get_current_llm_day_time(full_sentence=False),
-                current_cycle_count=cycle,
+                current_cycle_count=1,
                max_cycles=max_orchestrator_cycles,
                research_plan=research_plan,
                internal_search_research_task_guidance=internal_search_research_task_guidance,
            )
+            orchestration_tokens = token_counter(token_count_prompt)

-            system_prompt = ChatMessageSimple(
-                message=orchestrator_prompt,
-                token_count=orchestration_tokens,
-                message_type=MessageType.SYSTEM,
+            reasoning_cycles = 0
+            most_recent_reasoning: str | None = None
+            citation_mapping: CitationMapping = {}
+            final_turn_index: int = (
+                orchestrator_start_turn_index  # Track the final turn_index for stop packet
            )
+            for cycle in range(max_orchestrator_cycles):
+                if cycle == max_orchestrator_cycles - 1:
+                    # If it's the last cycle, forcibly generate the final report
+                    report_turn_index = (
+                        orchestrator_start_turn_index + cycle + reasoning_cycles
+                    )
+                    report_reasoned = generate_final_report(
+                        history=simple_chat_history,
+                        llm=llm,
+                        token_counter=token_counter,
+                        state_container=state_container,
+                        emitter=emitter,
+                        turn_index=report_turn_index,
+                        citation_mapping=citation_mapping,
+                        user_identity=user_identity,
+                    )
+                    # Update final_turn_index: base + 1 for the report itself + 1 if reasoning occurred
+                    final_turn_index = report_turn_index + (1 if report_reasoned else 0)
+                    break

-            truncated_message_history = construct_message_history(
-                system_prompt=system_prompt,
-                custom_agent_prompt=None,
-                simple_chat_history=simple_chat_history,
-                reminder_message=None,
-                project_files=None,
-                available_tokens=available_tokens,
-                last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
-            )
+                research_agent_calls: list[ToolCallKickoff] = []

-            # Use think tool processor for non-reasoning models to convert
-            # think_tool calls to reasoning content
-            custom_processor = (
-                create_think_tool_token_processor() if not is_reasoning_model else None
-            )
-
-            llm_step_result, has_reasoned = run_llm_step(
-                emitter=emitter,
-                history=truncated_message_history,
-                tool_definitions=get_orchestrator_tools(
-                    include_think_tool=not is_reasoning_model
-                ),
-                tool_choice=ToolChoiceOptions.REQUIRED,
-                llm=llm,
-                placement=Placement(
-                    turn_index=orchestrator_start_turn_index + cycle + reasoning_cycles
-                ),
-                # No citations in this step, it should just pass through all
-                # tokens directly so initialized as an empty citation processor
-                citation_processor=DynamicCitationProcessor(),
-                state_container=state_container,
-                final_documents=None,
-                user_identity=user_identity,
-                custom_token_processor=custom_processor,
-                is_deep_research=True,
-            )
-            if has_reasoned:
-                reasoning_cycles += 1
-
-            tool_calls = llm_step_result.tool_calls or []
-
-            if not tool_calls and cycle == 0:
-                raise RuntimeError(
-                    "Deep Research failed to generate any research tasks for the agents."
+                orchestrator_prompt = orchestrator_prompt_template.format(
+                    current_datetime=get_current_llm_day_time(full_sentence=False),
+                    current_cycle_count=cycle,
+                    max_cycles=max_orchestrator_cycles,
+                    research_plan=research_plan,
+                    internal_search_research_task_guidance=internal_search_research_task_guidance,
                )

-            if not tool_calls:
-                # Basically hope that this is an infrequent occurence and hopefully multiple research
-                # cycles have already ran
-                logger.warning("No tool calls found, this should not happen.")
-                report_turn_index = (
-                    orchestrator_start_turn_index + cycle + reasoning_cycles
+                system_prompt = ChatMessageSimple(
+                    message=orchestrator_prompt,
+                    token_count=orchestration_tokens,
+                    message_type=MessageType.SYSTEM,
                )
-                report_reasoned = generate_final_report(
-                    history=simple_chat_history,
-                    llm=llm,
-                    token_counter=token_counter,
-                    state_container=state_container,
+
+                truncated_message_history = construct_message_history(
+                    system_prompt=system_prompt,
+                    custom_agent_prompt=None,
+                    simple_chat_history=simple_chat_history,
+                    reminder_message=None,
+                    project_files=None,
+                    available_tokens=available_tokens,
+                    last_n_user_messages=MAX_USER_MESSAGES_FOR_CONTEXT,
+                )
+
+                # Use think tool processor for non-reasoning models to convert
+                # think_tool calls to reasoning content
+                custom_processor = (
+                    create_think_tool_token_processor()
+                    if not is_reasoning_model
+                    else None
+                )
+
+                llm_step_result, has_reasoned = run_llm_step(
                    emitter=emitter,
-                    turn_index=report_turn_index,
-                    citation_mapping=citation_mapping,
-                    user_identity=user_identity,
-                )
-                final_turn_index = report_turn_index + (1 if report_reasoned else 0)
-                break
-
-            special_tool_calls = check_special_tool_calls(tool_calls=tool_calls)
-
-            if special_tool_calls.generate_report_tool_call:
-                report_turn_index = (
-                    special_tool_calls.generate_report_tool_call.placement.turn_index
-                )
-                report_reasoned = generate_final_report(
-                    history=simple_chat_history,
+                    history=truncated_message_history,
+                    tool_definitions=get_orchestrator_tools(
+                        include_think_tool=not is_reasoning_model
+                    ),
+                    tool_choice=ToolChoiceOptions.REQUIRED,
                    llm=llm,
-                    token_counter=token_counter,
+                    placement=Placement(
+                        turn_index=orchestrator_start_turn_index
+                        + cycle
+                        + reasoning_cycles
+                    ),
+                    # No citations in this step, it should just pass through all
+                    # tokens directly so initialized as an empty citation processor
+                    citation_processor=DynamicCitationProcessor(),
                    state_container=state_container,
-                    emitter=emitter,
-                    turn_index=report_turn_index,
-                    citation_mapping=citation_mapping,
+                    final_documents=None,
                    user_identity=user_identity,
-                    saved_reasoning=most_recent_reasoning,
+                    custom_token_processor=custom_processor,
+                    is_deep_research=True,
                )
-                final_turn_index = report_turn_index + (1 if report_reasoned else 0)
-                break
-            elif special_tool_calls.think_tool_call:
-                think_tool_call = special_tool_calls.think_tool_call
-                # Only process the THINK_TOOL and skip all other tool calls
-                # This will not actually get saved to the db as a tool call but we'll attach it to the tool(s) called after
-                # it as if it were just a reasoning model doing it. In the chat history, because it happens in 2 steps,
-                # we will show it as a separate message.
-                # NOTE: This does not need to increment the reasoning cycles because the custom token processor causes
-                # the LLM step to handle this
-                with function_span("think_tool") as span:
-                    span.span_data.input = str(think_tool_call.tool_args)
-                    most_recent_reasoning = state_container.reasoning_tokens
-                    tool_call_message = think_tool_call.to_msg_str()
+                if has_reasoned:
+                    reasoning_cycles += 1

-                    think_tool_msg = ChatMessageSimple(
-                        message=tool_call_message,
-                        token_count=token_counter(tool_call_message),
-                        message_type=MessageType.TOOL_CALL,
-                        tool_call_id=think_tool_call.tool_call_id,
-                        image_files=None,
+                tool_calls = llm_step_result.tool_calls or []
+
+                if not tool_calls and cycle == 0:
+                    raise RuntimeError(
+                        "Deep Research failed to generate any research tasks for the agents."
                    )
-                    simple_chat_history.append(think_tool_msg)

-                    think_tool_response_msg = ChatMessageSimple(
-                        message=THINK_TOOL_RESPONSE_MESSAGE,
-                        token_count=THINK_TOOL_RESPONSE_TOKEN_COUNT,
-                        message_type=MessageType.TOOL_CALL_RESPONSE,
-                        tool_call_id=think_tool_call.tool_call_id,
-                        image_files=None,
-                    )
-                    simple_chat_history.append(think_tool_response_msg)
-                    span.span_data.output = THINK_TOOL_RESPONSE_MESSAGE
-                continue
-            else:
-                for tool_call in tool_calls:
-                    if tool_call.tool_name != RESEARCH_AGENT_TOOL_NAME:
-                        logger.warning(f"Unexpected tool call: {tool_call.tool_name}")
-                        continue
-
-                    research_agent_calls.append(tool_call)
-
-                if not research_agent_calls:
-                    logger.warning(
-                        "No research agent tool calls found, this should not happen."
-                    )
+                if not tool_calls:
+                    # Basically hope that this is an infrequent occurence and hopefully multiple research
+                    # cycles have already ran
+                    logger.warning("No tool calls found, this should not happen.")
                    report_turn_index = (
                        orchestrator_start_turn_index + cycle + reasoning_cycles
                    )
@@ -564,91 +497,177 @@ def run_deep_research_llm_loop(
                    final_turn_index = report_turn_index + (1 if report_reasoned else 0)
                    break

-                if len(research_agent_calls) > 1:
-                    emitter.emit(
-                        Packet(
-                            placement=Placement(
-                                turn_index=research_agent_calls[0].placement.turn_index
-                            ),
-                            obj=TopLevelBranching(
-                                num_parallel_branches=len(research_agent_calls)
-                            ),
+                special_tool_calls = check_special_tool_calls(tool_calls=tool_calls)
+
+                if special_tool_calls.generate_report_tool_call:
+                    report_turn_index = (
+                        special_tool_calls.generate_report_tool_call.placement.turn_index
+                    )
+                    report_reasoned = generate_final_report(
+                        history=simple_chat_history,
+                        llm=llm,
+                        token_counter=token_counter,
+                        state_container=state_container,
+                        emitter=emitter,
+                        turn_index=report_turn_index,
+                        citation_mapping=citation_mapping,
+                        user_identity=user_identity,
+                        saved_reasoning=most_recent_reasoning,
+                    )
+                    final_turn_index = report_turn_index + (1 if report_reasoned else 0)
+                    break
+                elif special_tool_calls.think_tool_call:
+                    think_tool_call = special_tool_calls.think_tool_call
+                    # Only process the THINK_TOOL and skip all other tool calls
+                    # This will not actually get saved to the db as a tool call but we'll attach it to the tool(s) called after
+                    # it as if it were just a reasoning model doing it. In the chat history, because it happens in 2 steps,
+                    # we will show it as a separate message.
+                    # NOTE: This does not need to increment the reasoning cycles because the custom token processor causes
+                    # the LLM step to handle this
+                    with function_span("think_tool") as span:
+                        span.span_data.input = str(think_tool_call.tool_args)
+                        most_recent_reasoning = state_container.reasoning_tokens
+                        tool_call_message = think_tool_call.to_msg_str()
+
+                        think_tool_msg = ChatMessageSimple(
+                            message=tool_call_message,
+                            token_count=token_counter(tool_call_message),
+                            message_type=MessageType.TOOL_CALL,
+                            tool_call_id=think_tool_call.tool_call_id,
+                            image_files=None,
                        )
-                    )
+                        simple_chat_history.append(think_tool_msg)

-                research_results = run_research_agent_calls(
-                    # The tool calls here contain the placement information
-                    research_agent_calls=research_agent_calls,
-                    parent_tool_call_ids=[
-                        tool_call.tool_call_id for tool_call in tool_calls
-                    ],
-                    tools=allowed_tools,
-                    emitter=emitter,
-                    state_container=state_container,
-                    llm=llm,
-                    is_reasoning_model=is_reasoning_model,
-                    token_counter=token_counter,
-                    citation_mapping=citation_mapping,
-                    user_identity=user_identity,
-                )
-
-                citation_mapping = research_results.citation_mapping
-
-                for tab_index, report in enumerate(
-                    research_results.intermediate_reports
-                ):
-                    if report is None:
-                        # The LLM will not see that this research was even attempted, it may try
-                        # something similar again but this is not bad.
-                        logger.error(
-                            f"Research agent call at tab_index {tab_index} failed, skipping"
+                        think_tool_response_msg = ChatMessageSimple(
+                            message=THINK_TOOL_RESPONSE_MESSAGE,
+                            token_count=THINK_TOOL_RESPONSE_TOKEN_COUNT,
+                            message_type=MessageType.TOOL_CALL_RESPONSE,
+                            tool_call_id=think_tool_call.tool_call_id,
+                            image_files=None,
                        )
-                        continue
+                        simple_chat_history.append(think_tool_response_msg)
+                        span.span_data.output = THINK_TOOL_RESPONSE_MESSAGE
+                    continue
+                else:
+                    for tool_call in tool_calls:
+                        if tool_call.tool_name != RESEARCH_AGENT_TOOL_NAME:
+                            logger.warning(
+                                f"Unexpected tool call: {tool_call.tool_name}"
+                            )
+                            continue

-                    current_tool_call = research_agent_calls[tab_index]
-                    tool_call_info = ToolCallInfo(
-                        parent_tool_call_id=None,
-                        turn_index=orchestrator_start_turn_index
-                        + cycle
-                        + reasoning_cycles,
-                        tab_index=tab_index,
-                        tool_name=current_tool_call.tool_name,
-                        tool_call_id=current_tool_call.tool_call_id,
-                        tool_id=get_tool_by_name(
-                            tool_name=RESEARCH_AGENT_DB_NAME, db_session=db_session
-                        ).id,
-                        reasoning_tokens=llm_step_result.reasoning
-                        or most_recent_reasoning,
-                        tool_call_arguments=current_tool_call.tool_args,
-                        tool_call_response=report,
-                        search_docs=None,  # Intermediate docs are not saved/shown
-                        generated_images=None,
+                        research_agent_calls.append(tool_call)
+
+                    if not research_agent_calls:
+                        logger.warning(
+                            "No research agent tool calls found, this should not happen."
+                        )
+                        report_turn_index = (
+                            orchestrator_start_turn_index + cycle + reasoning_cycles
+                        )
+                        report_reasoned = generate_final_report(
+                            history=simple_chat_history,
+                            llm=llm,
+                            token_counter=token_counter,
+                            state_container=state_container,
+                            emitter=emitter,
+                            turn_index=report_turn_index,
+                            citation_mapping=citation_mapping,
+                            user_identity=user_identity,
+                        )
+                        final_turn_index = report_turn_index + (
+                            1 if report_reasoned else 0
+                        )
+                        break
+
+                    if len(research_agent_calls) > 1:
+                        emitter.emit(
+                            Packet(
+                                placement=Placement(
+                                    turn_index=research_agent_calls[
+                                        0
+                                    ].placement.turn_index
+                                ),
+                                obj=TopLevelBranching(
+                                    num_parallel_branches=len(research_agent_calls)
+                                ),
+                            )
+                        )
+
+                    research_results = run_research_agent_calls(
+                        # The tool calls here contain the placement information
+                        research_agent_calls=research_agent_calls,
+                        parent_tool_call_ids=[
+                            tool_call.tool_call_id for tool_call in tool_calls
+                        ],
+                        tools=allowed_tools,
+                        emitter=emitter,
+                        state_container=state_container,
+                        llm=llm,
+                        is_reasoning_model=is_reasoning_model,
+                        token_counter=token_counter,
+                        citation_mapping=citation_mapping,
+                        user_identity=user_identity,
                    )
-                    state_container.add_tool_call(tool_call_info)

-                    tool_call_message = current_tool_call.to_msg_str()
-                    tool_call_token_count = token_counter(tool_call_message)
+                    citation_mapping = research_results.citation_mapping

-                    tool_call_msg = ChatMessageSimple(
-                        message=tool_call_message,
-                        token_count=tool_call_token_count,
-                        message_type=MessageType.TOOL_CALL,
-                        tool_call_id=current_tool_call.tool_call_id,
-                        image_files=None,
-                    )
-                    simple_chat_history.append(tool_call_msg)
+                    for tab_index, report in enumerate(
+                        research_results.intermediate_reports
+                    ):
+                        if report is None:
+                            # The LLM will not see that this research was even attempted, it may try
+                            # something similar again but this is not bad.
+                            logger.error(
+                                f"Research agent call at tab_index {tab_index} failed, skipping"
+                            )
+                            continue

-                    tool_call_response_msg = ChatMessageSimple(
-                        message=report,
-                        token_count=token_counter(report),
-                        message_type=MessageType.TOOL_CALL_RESPONSE,
-                        tool_call_id=current_tool_call.tool_call_id,
-                        image_files=None,
-                    )
-                    simple_chat_history.append(tool_call_response_msg)
+                        current_tool_call = research_agent_calls[tab_index]
+                        tool_call_info = ToolCallInfo(
+                            parent_tool_call_id=None,
+                            turn_index=orchestrator_start_turn_index
+                            + cycle
+                            + reasoning_cycles,
+                            tab_index=tab_index,
+                            tool_name=current_tool_call.tool_name,
+                            tool_call_id=current_tool_call.tool_call_id,
+                            tool_id=get_tool_by_name(
+                                tool_name=RESEARCH_AGENT_TOOL_NAME,
+                                db_session=db_session,
+                            ).id,
+                            reasoning_tokens=llm_step_result.reasoning
+                            or most_recent_reasoning,
+                            tool_call_arguments=current_tool_call.tool_args,
+                            tool_call_response=report,
+                            search_docs=None,  # Intermediate docs are not saved/shown
+                            generated_images=None,
+                        )
+                        state_container.add_tool_call(tool_call_info)

-            # If it reached this point, it did not call reasoning, so here we wipe it to not save it to multiple turns
-            most_recent_reasoning = None
+                        tool_call_message = current_tool_call.to_msg_str()
+                        tool_call_token_count = token_counter(tool_call_message)
+
+                        tool_call_msg = ChatMessageSimple(
+                            message=tool_call_message,
+                            token_count=tool_call_token_count,
+                            message_type=MessageType.TOOL_CALL,
+                            tool_call_id=current_tool_call.tool_call_id,
+                            image_files=None,
+                        )
+                        simple_chat_history.append(tool_call_msg)
+
+                        tool_call_response_msg = ChatMessageSimple(
+                            message=report,
+                            token_count=token_counter(report),
+                            message_type=MessageType.TOOL_CALL_RESPONSE,
+                            tool_call_id=current_tool_call.tool_call_id,
+                            image_files=None,
+                        )
+                        simple_chat_history.append(tool_call_response_msg)
+
+                # If it reached this point, it did not call reasoning, so here we wipe it to not save it to multiple turns
+                most_recent_reasoning = None

        emitter.emit(
            Packet(
--- a/backend/onyx/deep_research/dr_mock_tools.py
+++ b/backend/onyx/deep_research/dr_mock_tools.py
@@ -1,6 +1,6 @@
 GENERATE_PLAN_TOOL_NAME = "generate_plan"

-RESEARCH_AGENT_DB_NAME = "ResearchAgent"
+RESEARCH_AGENT_IN_CODE_ID = "ResearchAgent"
 RESEARCH_AGENT_TOOL_NAME = "research_agent"
 RESEARCH_AGENT_TASK_KEY = "task"

--- a/backend/onyx/document_index/opensearch/README.md
+++ b/backend/onyx/document_index/opensearch/README.md
@@ -0,0 +1,62 @@
+# Opensearch Idiosyncrasies
+
+## How it works at a high level
+Opensearch has 2 phases, a `Search` phase and a `Fetch` phase. The `Search` phase works by getting the document scores on each
+shard separately, then typically a fetch phase grabs all of the relevant fields/data for returning to the user. There is also
+an intermediate phase (seemingly built specifically to handle hybrid search queries) which can run in between as a processor.
+References:
+https://docs.opensearch.org/latest/search-plugins/search-pipelines/search-processors/
+https://docs.opensearch.org/latest/search-plugins/search-pipelines/normalization-processor/
+https://docs.opensearch.org/latest/query-dsl/compound/hybrid/
+
+## How Hybrid queries work
+Hybrid queries are basically parallel queries that each run through their own `Search` phase and do not interact in any way.
+They also run across all the shards. It is not entirely clear what happens if a combination pipeline is not specified for them,
+perhaps the scores are just summed.
+
+When the normalization processor is applied to keyword/vector hybrid searches, documents that show up due to keyword match may
+not also have showed up in the vector search and vice versa. In these situations, it just receives a 0 score for the missing
+query component. Opensearch does not run another phase to recapture those missing values. The impact of this is that after
+normalizing, the missing scores are 0 but this is a higher score than if it actually received a non-zero score.
+
+This may not be immediately obvious so an explanation is included here. If it got a non-zero score instead, it must be lower
+than all of the other scores of the list (otherwise it would have shown up). Therefore it would impact the normalization and
+push the other scores higher so that it's not only the lowest score still, but now it's a differentiated lowest score. This is
+not strictly the case in a multi-node setup but the high level concept approximately holds. So basically the 0 score is a form
+of "minimum value clipping".
+
+## On time decay and boosting
+Embedding models do not have a uniform distribution from 0 to 1. The values typically cluster strongly around 0.6 to 0.8 but also
+varies between models and even the query. It is not a safe assumption to pre-normalize the scores so we also cannot apply any
+additive or multiplicative boost to it. Ie. if results of a doc cluster around 0.6 to 0.8 and I give a 50% penalty to the score,
+it doesn't bring a result from the top of the range to 50 percentile, it brings its under the 0.6 and is now the worst match.
+Same logic applies to additive boosting.
+
+So these boosts can only be applied after normalization. Unfortunately with Opensearch, the normalization processor runs last
+and only applies to the results of the completely independent `Search` phase queries. So if a time based boost (a separate
+query which filters on recently updated documents) is added, it would not be able to introduce any new documents
+to the set (since the new documents would have no keyword/vector score or already be present) since the 0 scores on keyword
+and vector would make the docs which only came because of time filter very low scoring. This can however make some of the lower
+scored documents from the union of all the `Search` phase documents to show up higher and potentially not get dropped before
+being fetched and returned to the user. But there are other issues of including these:
+- There is no way to sort by this field, only a filter, so there's no way to guarantee the best docs even irrespective of the
+contents. If there are lots of updates, this may miss
+- There is not a good way to normalize this field, the best is to clip it on the bottom.
+- This would require using min-max norm but z-score norm is better for the other functions due to things like it being less
+sensitive to outliers, better handles distribution drifts (min-max assumes stable meaningful ranges), better for comparing
+"unusual-ness" across distributions.
+
+So while it is possible to apply time based boosting at the normalization stage (or specifically to the keyword score), we have
+decided it is better to not apply it during the OpenSearch query.
+
+Because of these limitations, Onyx in code applies further refinements, boostings, etc. based on OpenSearch providing an initial
+filtering. The impact of time decay and boost should not be so big that we would need orders of magnitude more results back
+from OpenSearch.
+
+## Other concepts to be aware of
+Within the `Search` phase, there are optional steps like Rescore but these are not useful for the combination/normalization
+work that is relevant for the hybrid search. Since the Rescore happens prior to normalization, it's not able to provide any
+meaningful operations to the query for our usage.
+
+Because the Title is included in the Contents for both embedding and keyword searches, the Title scores are very low relative to
+the actual full contents scoring. It is seen as a boost rather than a core scoring component. Time decay works similarly.
--- a/backend/onyx/document_index/opensearch/opensearch_document_index.py
+++ b/backend/onyx/document_index/opensearch/opensearch_document_index.py
@@ -58,7 +58,7 @@ def _convert_opensearch_chunk_to_inference_chunk_uncleaned(
        blurb=chunk.blurb,
        content=chunk.content,
        source_links=json.loads(chunk.source_links) if chunk.source_links else None,
-        image_file_id=chunk.image_file_name,
+        image_file_id=chunk.image_file_id,
        # TODO(andrei) Yuhong says he doesn't think we need that anymore. Used
        # if a section needed to be split into diff chunks. A section is a part
        # of a doc that a link will take you to. But don't chunks have their own
@@ -68,12 +68,9 @@ def _convert_opensearch_chunk_to_inference_chunk_uncleaned(
        source_type=DocumentSource(chunk.source_type),
        semantic_identifier=chunk.semantic_identifier,
        title=chunk.title,
-        # TODO(andrei): Same comment as in
-        # _convert_onyx_chunk_to_opensearch_document. Yuhong thinks OpenSearch
-        # has some thing out of the box for this. Just need to look at it in a
-        # followup.
-        boost=1,
-        # TODO(andrei): Do in a followup.
+        boost=chunk.global_boost,
+        # TODO(andrei): Do in a followup. We should be able to get this from
+        # OpenSearch.
        recency_bias=1.0,
        # TODO(andrei): This is how good the match is, we need this, key insight
        # is we can order chunks by this. Should not be hard to plumb this from
@@ -83,10 +80,9 @@ def _convert_opensearch_chunk_to_inference_chunk_uncleaned(
        # TODO(andrei): Don't worry about these for now.
        # is_relevant
        # relevance_explanation
-        # metadata
        # TODO(andrei): Same comment as in
        # _convert_onyx_chunk_to_opensearch_document.
-        metadata={},
+        metadata=json.loads(chunk.metadata),
        # TODO(andrei): The vector DB needs to supply this. I vaguely know
        # OpenSearch can from the documentation I've seen till now, look at this
        # in a followup.
@@ -132,29 +128,19 @@ def _convert_onyx_chunk_to_opensearch_document(
        # value for now.
        num_tokens=0,
        source_type=chunk.source_document.source.value,
-        # TODO(andrei): This is just represented a bit differently in
-        # DocumentBase than how we expect it in the schema currently. Look at
-        # this closer in a followup. Always defaults to None for now.
-        # metadata=chunk.source_document.metadata,
+        metadata=json.dumps(chunk.source_document.metadata),
        last_updated=chunk.source_document.doc_updated_at,
        # TODO(andrei): Don't currently see an easy way of porting this, and
        # besides some connectors genuinely don't have this data. Look at this
        # closer in a followup. Always defaults to None for now.
        # created_at=None,
        public=chunk.access.is_public,
-        # TODO(andrei): Implement ACL in a followup, currently none of the
-        # methods in OpenSearchDocumentIndex support it anyway. Always defaults
-        # to None for now.
-        # access_control_list=chunk.access.to_acl(),
-        # TODO(andrei): This doesn't work bc global_boost is float, presumably
-        # between 0.0 and inf (check this) and chunk.boost is an int from -inf
-        # to +inf. Look at how the scaling compares between these in a followup.
-        # Always defaults to 1.0 for now.
-        # global_boost=chunk.boost,
+        access_control_list=list(chunk.access.to_acl()),
+        global_boost=chunk.boost,
        semantic_identifier=chunk.source_document.semantic_identifier,
        # TODO(andrei): Ask Chris more about this later. Always defaults to None
        # for now.
-        # image_file_name=None,
+        image_file_id=None,
        source_links=json.dumps(chunk.source_links) if chunk.source_links else None,
        blurb=chunk.blurb,
        document_sets=list(chunk.document_sets) if chunk.document_sets else None,
--- a/backend/onyx/document_index/opensearch/schema.py
+++ b/backend/onyx/document_index/opensearch/schema.py
@@ -27,7 +27,7 @@ ACCESS_CONTROL_LIST_FIELD_NAME = "access_control_list"
 HIDDEN_FIELD_NAME = "hidden"
 GLOBAL_BOOST_FIELD_NAME = "global_boost"
 SEMANTIC_IDENTIFIER_FIELD_NAME = "semantic_identifier"
-IMAGE_FILE_NAME_FIELD_NAME = "image_file_name"
+IMAGE_FILE_ID_FIELD_NAME = "image_file_id"
 SOURCE_LINKS_FIELD_NAME = "source_links"
 DOCUMENT_SETS_FIELD_NAME = "document_sets"
 PROJECT_IDS_FIELD_NAME = "project_ids"
@@ -71,37 +71,41 @@ class DocumentChunk(BaseModel):
    max_chunk_size: int = DEFAULT_MAX_CHUNK_SIZE

    # Either both should be None or both should be non-None.
-    title: str | None = None
-    title_vector: list[float] | None = None
+    title: str | None
+    title_vector: list[float] | None
    content: str
    content_vector: list[float]
    # The actual number of tokens in the chunk.
    num_tokens: int

    source_type: str
-    # Application logic should store these strings the format key:::value.
-    metadata: list[str] | None = None
-    last_updated: datetime | None = None
+    # Contains a string representation of a dict which maps string key to either
+    # string value or list of string values.
+    # TODO(andrei): When we augment content with metadata this can just be an
+    # index pointer, and when we support metadata list that will just be a list
+    # of strings.
+    metadata: str
+    last_updated: datetime | None
    created_at: datetime | None = None

    public: bool
-    access_control_list: list[str] | None = None
+    access_control_list: list[str]
    # Defaults to False, currently gets written during update not index.
    hidden: bool = False

-    global_boost: float = 1.0
+    global_boost: int

    semantic_identifier: str
-    image_file_name: str | None = None
+    image_file_id: str | None
    # Contains a string representation of a dict which maps offset into the raw
    # chunk text to the link corresponding to that point.
-    source_links: str | None = None
+    source_links: str | None
    blurb: str

-    document_sets: list[str] | None = None
-    project_ids: list[int] | None = None
+    document_sets: list[str] | None
+    project_ids: list[int] | None

-    tenant_id: str | None = None
+    tenant_id: str | None

    @model_validator(mode="after")
    def check_num_tokens_fits_within_max_chunk_size(self) -> Self:
@@ -183,6 +187,7 @@ class DocumentSchema:
                    "fields": {
                        # Subfield accessed as title.keyword. Not indexed for
                        # values longer than 256 chars.
+                        # TODO(andrei): Ask Yuhong do we want this?
                        "keyword": {"type": "keyword", "ignore_above": 256}
                    },
                },
@@ -200,6 +205,8 @@ class DocumentSchema:
                        "parameters": {"ef_construction": EF_CONSTRUCTION, "m": M},
                    },
                },
+                # TODO(andrei): This is a tensor in Vespa. Also look at feature
+                # parity for these other method fields.
                CONTENT_VECTOR_FIELD_NAME: {
                    "type": "knn_vector",
                    "dimension": vector_dimension,
@@ -216,8 +223,9 @@ class DocumentSchema:
                # # Number of tokens in the chunk's content.
                # NUM_TOKENS_FIELD_NAME: {"type": "integer", "store": True},
                SOURCE_TYPE_FIELD_NAME: {"type": "keyword"},
-                # Application logic should store in the format key:::value.
                METADATA_FIELD_NAME: {"type": "keyword"},
+                # TODO(andrei): Check if Vespa stores seconds, we may wanna do
+                # seconds here not millis.
                LAST_UPDATED_FIELD_NAME: {
                    "type": "date",
                    "format": "epoch_millis",
@@ -247,7 +255,7 @@ class DocumentSchema:
                # all other search filters; up to search implementations to
                # guarantee this.
                HIDDEN_FIELD_NAME: {"type": "boolean"},
-                GLOBAL_BOOST_FIELD_NAME: {"type": "float"},
+                GLOBAL_BOOST_FIELD_NAME: {"type": "integer"},
                # This field is only used for displaying a useful name for the
                # doc in the UI and is not used for searching. Disabling these
                # features to increase perf.
@@ -258,7 +266,7 @@ class DocumentSchema:
                    "store": False,
                },
                # Same as above; used to display an image along with the doc.
-                IMAGE_FILE_NAME_FIELD_NAME: {
+                IMAGE_FILE_ID_FIELD_NAME: {
                    "type": "keyword",
                    "index": False,
                    "doc_values": False,
@@ -285,6 +293,7 @@ class DocumentSchema:
                DOCUMENT_ID_FIELD_NAME: {"type": "keyword"},
                CHUNK_INDEX_FIELD_NAME: {"type": "integer"},
                # The maximum number of tokens this chunk's content can hold.
+                # TODO(andrei): Can we generalize this to embedding type?
                MAX_CHUNK_SIZE_FIELD_NAME: {"type": "integer"},
            }
        }
--- a/backend/onyx/document_index/opensearch/search.py
+++ b/backend/onyx/document_index/opensearch/search.py
@@ -316,6 +316,7 @@ class DocumentQuery:
            {
                "multi_match": {
                    "query": query_text,
+                    # TODO(andrei): Ask Yuhong do we want this?
                    "fields": [f"{TITLE_FIELD_NAME}^2", f"{TITLE_FIELD_NAME}.keyword"],
                    "type": "best_fields",
                }
--- a/backend/onyx/file_processing/html_utils.py
+++ b/backend/onyx/file_processing/html_utils.py
@@ -164,7 +164,7 @@ def format_document_soup(


 def parse_html_page_basic(text: str | BytesIO | IO[bytes]) -> str:
-    soup = bs4.BeautifulSoup(text, "html.parser")
+    soup = bs4.BeautifulSoup(text, "lxml")
    return format_document_soup(soup)


@@ -174,7 +174,7 @@ def web_html_cleanup(
    additional_element_types_to_discard: list[str] | None = None,
 ) -> ParsedHTML:
    if isinstance(page_content, str):
-        soup = bs4.BeautifulSoup(page_content, "html.parser")
+        soup = bs4.BeautifulSoup(page_content, "lxml")
    else:
        soup = page_content

--- a/backend/onyx/file_processing/unstructured.py
+++ b/backend/onyx/file_processing/unstructured.py
@@ -9,7 +9,7 @@ from onyx.key_value_store.interface import KvKeyNotFoundError
 from onyx.utils.logger import setup_logger

 if TYPE_CHECKING:
-    from unstructured_client.models import operations  # type: ignore
+    from unstructured_client.models import operations


 logger = setup_logger()
@@ -55,19 +55,19 @@ def _sdk_partition_request(

 def unstructured_to_text(file: IO[Any], file_name: str) -> str:
    from unstructured.staging.base import dict_to_elements
-    from unstructured_client import UnstructuredClient  # type: ignore
+    from unstructured_client import UnstructuredClient

    logger.debug(f"Starting to read file: {file_name}")
    req = _sdk_partition_request(file, file_name, strategy="fast")

    unstructured_client = UnstructuredClient(api_key_auth=get_unstructured_api_key())

-    response = unstructured_client.general.partition(req)
-    elements = dict_to_elements(response.elements)
+    response = unstructured_client.general.partition(request=req)

    if response.status_code != 200:
        err = f"Received unexpected status code {response.status_code} from Unstructured API."
        logger.error(err)
        raise ValueError(err)

+    elements = dict_to_elements(response.elements or [])
    return "\n\n".join(str(el) for el in elements)
--- a/backend/onyx/indexing/adapters/user_file_indexing_adapter.py
+++ b/backend/onyx/indexing/adapters/user_file_indexing_adapter.py
@@ -6,15 +6,19 @@ from uuid import UUID

 from sqlalchemy import select
 from sqlalchemy.exc import OperationalError
+from sqlalchemy.orm import selectinload
 from sqlalchemy.orm import Session
 from sqlalchemy.orm.session import TransactionalContext

 from onyx.access.access import get_access_for_user_files
 from onyx.access.models import DocumentAccess
 from onyx.configs.constants import DEFAULT_BOOST
+from onyx.configs.constants import NotificationType
 from onyx.connectors.models import Document
 from onyx.db.enums import UserFileStatus
+from onyx.db.models import Persona
 from onyx.db.models import UserFile
+from onyx.db.notification import create_notification
 from onyx.db.user_file import fetch_chunk_counts_for_user_files
 from onyx.db.user_file import fetch_user_project_ids_for_user_files
 from onyx.file_store.utils import store_user_file_plaintext
@@ -194,6 +198,42 @@ class UserFileIndexingAdapter:
            user_file_id_to_token_count=user_file_id_to_token_count,
        )

+    def _notify_assistant_owners_if_files_ready(
+        self, user_files: list[UserFile]
+    ) -> None:
+        """
+        Check if all files for associated assistants are processed and notify owners.
+        Only sends notification when all files for an assistant are COMPLETED.
+        """
+        for user_file in user_files:
+            if user_file.status == UserFileStatus.COMPLETED:
+                for assistant in user_file.assistants:
+                    # Skip assistants without owners
+                    if assistant.user_id is None:
+                        continue
+
+                    # Check if all OTHER files for this assistant are completed
+                    # (we already know current file is completed from the outer check)
+                    all_files_completed = all(
+                        f.status == UserFileStatus.COMPLETED
+                        for f in assistant.user_files
+                        if f.id != user_file.id
+                    )
+
+                    if all_files_completed:
+                        create_notification(
+                            user_id=assistant.user_id,
+                            notif_type=NotificationType.ASSISTANT_FILES_READY,
+                            db_session=self.db_session,
+                            title="Your files are ready!",
+                            description=f"All files for agent {assistant.name} have been processed and are now available.",
+                            additional_data={
+                                "persona_id": assistant.id,
+                                "link": f"/assistants/{assistant.id}",
+                            },
+                            autocommit=False,
+                        )
+
    def post_index(
        self,
        context: DocumentBatchPrepareContext,
@@ -204,7 +244,10 @@ class UserFileIndexingAdapter:
        user_file_ids = [doc.id for doc in context.updatable_docs]

        user_files = (
-            self.db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).all()
+            self.db_session.query(UserFile)
+            .options(selectinload(UserFile.assistants).selectinload(Persona.user_files))
+            .filter(UserFile.id.in_(user_file_ids))
+            .all()
        )
        for user_file in user_files:
            # don't update the status if the user file is being deleted
@@ -217,6 +260,10 @@ class UserFileIndexingAdapter:
            user_file.token_count = result.user_file_id_to_token_count[
                str(user_file.id)
            ]
+
+        # Notify assistant owners if all their files are now processed
+        self._notify_assistant_owners_if_files_ready(user_files)
+
        self.db_session.commit()

        # Store the plaintext in the file store for faster retrieval
--- a/backend/onyx/llm/prompt_cache/providers/vertex.py
+++ b/backend/onyx/llm/prompt_cache/providers/vertex.py
@@ -48,7 +48,7 @@ class VertexAIPromptCacheProvider(PromptCacheProvider):
            cacheable_prefix=cacheable_prefix,
            suffix=suffix,
            continuation=continuation,
-            transform_cacheable=_add_vertex_cache_control,
+            transform_cacheable=None,  # TODO: support explicit caching
        )

    def extract_cache_metadata(
@@ -89,6 +89,10 @@ def _add_vertex_cache_control(
    not at the message level. This function converts string content to the array format
    and adds cache_control to the last content block in each cacheable message.
    """
+    # NOTE: unfortunately we need a much more sophisticated mechnism to support
+    # explict caching with vertex in the presence of tools and system messages
+    # (since they're supposed to be stripped out when setting cache_control)
+    # so we're deferring this to a future PR.
    updated: list[ChatCompletionMessage] = []
    for message in messages:
        mutated = dict(message)
--- a/backend/onyx/llm/well_known_providers/auto_update_service.py
+++ b/backend/onyx/llm/well_known_providers/auto_update_service.py
@@ -82,7 +82,6 @@ def fetch_llm_recommendations_from_github(

 def sync_llm_models_from_github(
    db_session: Session,
-    config: LLMRecommendations,
    force: bool = False,
 ) -> dict[str, int]:
    """Sync models from GitHub config to database for all Auto mode providers.
@@ -101,19 +100,24 @@ def sync_llm_models_from_github(
    Returns:
        Dict of provider_name -> number of changes made.
    """
-    # Skip if we've already processed this version (unless forced)
-    last_updated_at = _get_cached_last_updated_at()
-    if not force and last_updated_at and config.updated_at <= last_updated_at:
-        logger.debug("GitHub config unchanged, skipping sync")
-        return {}
-
    results: dict[str, int] = {}

    # Get all providers in Auto mode
    auto_providers = fetch_auto_mode_providers(db_session)
-
    if not auto_providers:
        logger.debug("No providers in Auto mode found")
+        return {}
+
+    # Fetch config from GitHub
+    config = fetch_llm_recommendations_from_github()
+    if not config:
+        logger.warning("Failed to fetch GitHub config")
+        return {}
+
+    # Skip if we've already processed this version (unless forced)
+    last_updated_at = _get_cached_last_updated_at()
+    if not force and last_updated_at and config.updated_at <= last_updated_at:
+        logger.debug("GitHub config unchanged, skipping sync")
        _set_cached_last_updated_at(config.updated_at)
        return {}

--- a/backend/onyx/onyxbot/slack/handlers/handle_regular_answer.py
+++ b/backend/onyx/onyxbot/slack/handlers/handle_regular_answer.py
@@ -35,6 +35,7 @@ from onyx.onyxbot.slack.utils import respond_in_thread_or_channel
 from onyx.onyxbot.slack.utils import SlackRateLimiter
 from onyx.onyxbot.slack.utils import update_emote_react
 from onyx.server.query_and_chat.models import CreateChatMessageRequest
+from onyx.server.query_and_chat.models import MessageOrigin
 from onyx.utils.logger import OnyxLoggingAdapter

 srl = SlackRateLimiter()
@@ -236,6 +237,7 @@ def handle_regular_answer(
                retrieval_details=retrieval_details,
                rerank_settings=None,  # Rerank customization supported in Slack flow
                db_session=db_session,
+                origin=MessageOrigin.SLACKBOT,
            )

        # if it's a DM or ephemeral message, answer based on private documents.
--- a/backend/onyx/server/features/notifications/api.py
+++ b/backend/onyx/server/features/notifications/api.py
@@ -9,11 +9,13 @@ from onyx.db.models import User
 from onyx.db.notification import dismiss_notification
 from onyx.db.notification import get_notification_by_id
 from onyx.db.notification import get_notifications
+from onyx.server.features.release_notes.utils import (
+    ensure_release_notes_fresh_and_notify,
+)
 from onyx.server.settings.models import Notification as NotificationModel
 from onyx.utils.logger import setup_logger

 logger = setup_logger()
-
 router = APIRouter(prefix="/notifications")


@@ -22,9 +24,27 @@ def get_notifications_api(
    user: User = Depends(current_user),
    db_session: Session = Depends(get_session),
 ) -> list[NotificationModel]:
+    """
+    Get all undismissed notifications for the current user.
+
+    Note: also executes background checks that should create notifications.
+
+    Examples of checks that create new notifications:
+    - Checking for new release notes the user hasn't seen
+    - Checking for misconfigurations due to version changes
+    - Explicitly announcing breaking changes
+    """
+    # If more background checks are added, this should be moved to a helper function
+    try:
+        ensure_release_notes_fresh_and_notify(db_session)
+    except Exception:
+        # Log exception but don't fail the entire endpoint
+        # Users can still see their existing notifications
+        logger.exception("Failed to check for release notes in notifications endpoint")
+
    notifications = [
        NotificationModel.from_model(notif)
-        for notif in get_notifications(user, db_session, include_dismissed=False)
+        for notif in get_notifications(user, db_session, include_dismissed=True)
    ]
    return notifications

--- a/backend/onyx/server/features/release_notes/init.py
+++ b/backend/onyx/server/features/release_notes/init.py
--- a/backend/onyx/server/features/release_notes/constants.py
+++ b/backend/onyx/server/features/release_notes/constants.py
@@ -0,0 +1,23 @@
+"""Constants for release notes functionality."""
+
+# GitHub source
+GITHUB_RAW_BASE_URL = (
+    "https://raw.githubusercontent.com/onyx-dot-app/documentation/main"
+)
+GITHUB_CHANGELOG_RAW_URL = f"{GITHUB_RAW_BASE_URL}/changelog.mdx"
+
+# Base URL for changelog documentation (used for notification links)
+DOCS_CHANGELOG_BASE_URL = "https://docs.onyx.app/changelog"
+
+FETCH_TIMEOUT = 60.0
+
+# Redis keys (in shared namespace)
+REDIS_KEY_PREFIX = "release_notes:"
+REDIS_KEY_FETCHED_AT = f"{REDIS_KEY_PREFIX}fetched_at"
+REDIS_KEY_ETAG = f"{REDIS_KEY_PREFIX}etag"
+
+# Cache TTL: 24 hours
+REDIS_CACHE_TTL = 60 * 60 * 24
+
+# Auto-refresh threshold: 1 hour
+AUTO_REFRESH_THRESHOLD_SECONDS = 60 * 60
--- a/backend/onyx/server/features/release_notes/models.py
+++ b/backend/onyx/server/features/release_notes/models.py
@@ -0,0 +1,11 @@
+"""Pydantic models for release notes."""
+
+from pydantic import BaseModel
+
+
+class ReleaseNoteEntry(BaseModel):
+    """A single version's release note entry."""
+
+    version: str  # e.g., "v2.7.0"
+    date: str  # e.g., "January 7th, 2026"
+    title: str  # Display title for notifications: "Onyx v2.7.0 is available!"
--- a/backend/onyx/server/features/release_notes/utils.py
+++ b/backend/onyx/server/features/release_notes/utils.py
@@ -0,0 +1,247 @@
+"""Utility functions for release notes parsing and caching."""
+
+import re
+from datetime import datetime
+from datetime import timezone
+
+import httpx
+from sqlalchemy.orm import Session
+
+from onyx import __version__
+from onyx.configs.constants import OnyxRedisLocks
+from onyx.db.release_notes import create_release_notifications_for_versions
+from onyx.redis.redis_pool import get_shared_redis_client
+from onyx.server.features.release_notes.constants import AUTO_REFRESH_THRESHOLD_SECONDS
+from onyx.server.features.release_notes.constants import FETCH_TIMEOUT
+from onyx.server.features.release_notes.constants import GITHUB_CHANGELOG_RAW_URL
+from onyx.server.features.release_notes.constants import REDIS_CACHE_TTL
+from onyx.server.features.release_notes.constants import REDIS_KEY_ETAG
+from onyx.server.features.release_notes.constants import REDIS_KEY_FETCHED_AT
+from onyx.server.features.release_notes.models import ReleaseNoteEntry
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+# ============================================================================
+# Version Utilities
+# ============================================================================
+
+
+def is_valid_version(version: str) -> bool:
+    """Check if version matches vX.Y.Z or vX.Y.Z-suffix.N pattern exactly."""
+    return bool(re.match(r"^v\d+\.\d+\.\d+(-[a-zA-Z]+\.\d+)?$", version))
+
+
+def parse_version_tuple(version: str) -> tuple[int, int, int]:
+    """Parse version string to tuple for semantic sorting."""
+    clean = re.sub(r"^v", "", version)
+    clean = re.sub(r"-.*$", "", clean)
+    parts = clean.split(".")
+    return (
+        int(parts[0]) if len(parts) > 0 else 0,
+        int(parts[1]) if len(parts) > 1 else 0,
+        int(parts[2]) if len(parts) > 2 else 0,
+    )
+
+
+def is_version_gte(v1: str, v2: str) -> bool:
+    """Check if v1 >= v2. Strips suffixes like -cloud.X or -beta.X."""
+    return parse_version_tuple(v1) >= parse_version_tuple(v2)
+
+
+# ============================================================================
+# MDX Parsing
+# ============================================================================
+
+
+def parse_mdx_to_release_note_entries(mdx_content: str) -> list[ReleaseNoteEntry]:
+    """Parse MDX content into ReleaseNoteEntry objects for versions >= __version__."""
+    all_entries = []
+
+    update_pattern = (
+        r'<Update\s+label="([^"]+)"\s+description="([^"]+)"'
+        r"(?:\s+tags=\{([^}]+)\})?[^>]*>"
+        r".*?"
+        r"</Update>"
+    )
+
+    for match in re.finditer(update_pattern, mdx_content, re.DOTALL):
+        version = match.group(1)
+        date = match.group(2)
+
+        if is_valid_version(version):
+            all_entries.append(
+                ReleaseNoteEntry(
+                    version=version,
+                    date=date,
+                    title=f"Onyx {version} is available!",
+                )
+            )
+
+    if not all_entries:
+        raise ValueError("Could not parse any release note entries from MDX.")
+
+    # Filter to valid versions >= __version__
+    if __version__ and is_valid_version(__version__):
+        entries = [
+            entry for entry in all_entries if is_version_gte(entry.version, __version__)
+        ]
+    elif "nightly" in __version__:
+        # Just show the latest entry for nightly versions
+        entries = sorted(
+            all_entries, key=lambda x: parse_version_tuple(x.version), reverse=True
+        )[:1]
+    else:
+        # If not recognized version
+        # likely `development` and we should show all entries
+        entries = all_entries
+
+    return entries
+
+
+# ============================================================================
+# Cache Helpers (ETag + timestamp only)
+# ============================================================================
+
+
+def get_cached_etag() -> str | None:
+    """Get the cached GitHub ETag from Redis."""
+    redis_client = get_shared_redis_client()
+    try:
+        etag = redis_client.get(REDIS_KEY_ETAG)
+        if etag:
+            return etag.decode("utf-8") if isinstance(etag, bytes) else str(etag)
+        return None
+    except Exception as e:
+        logger.error(f"Failed to get cached etag from Redis: {e}")
+        return None
+
+
+def get_last_fetch_time() -> datetime | None:
+    """Get the last fetch timestamp from Redis."""
+    redis_client = get_shared_redis_client()
+    try:
+        fetched_at_str = redis_client.get(REDIS_KEY_FETCHED_AT)
+        if not fetched_at_str:
+            return None
+
+        decoded = (
+            fetched_at_str.decode("utf-8")
+            if isinstance(fetched_at_str, bytes)
+            else str(fetched_at_str)
+        )
+
+        last_fetch = datetime.fromisoformat(decoded)
+
+        # Defensively ensure timezone awareness
+        # fromisoformat() returns naive datetime if input lacks timezone
+        if last_fetch.tzinfo is None:
+            # Assume UTC for naive datetimes
+            last_fetch = last_fetch.replace(tzinfo=timezone.utc)
+        else:
+            # Convert to UTC if timezone-aware
+            last_fetch = last_fetch.astimezone(timezone.utc)
+
+        return last_fetch
+    except Exception as e:
+        logger.error(f"Failed to get last fetch time from Redis: {e}")
+        return None
+
+
+def save_fetch_metadata(etag: str | None) -> None:
+    """Save ETag and fetch timestamp to Redis."""
+    redis_client = get_shared_redis_client()
+    now = datetime.now(timezone.utc)
+
+    try:
+        redis_client.set(REDIS_KEY_FETCHED_AT, now.isoformat(), ex=REDIS_CACHE_TTL)
+        if etag:
+            redis_client.set(REDIS_KEY_ETAG, etag, ex=REDIS_CACHE_TTL)
+    except Exception as e:
+        logger.error(f"Failed to save fetch metadata to Redis: {e}")
+
+
+def is_cache_stale() -> bool:
+    """Check if we should fetch from GitHub."""
+    last_fetch = get_last_fetch_time()
+    if last_fetch is None:
+        return True
+    age = datetime.now(timezone.utc) - last_fetch
+    return age.total_seconds() > AUTO_REFRESH_THRESHOLD_SECONDS
+
+
+# ============================================================================
+# Main Function
+# ============================================================================
+
+
+def ensure_release_notes_fresh_and_notify(db_session: Session) -> None:
+    """
+    Check for new release notes and create notifications if needed.
+
+    Called from /api/notifications endpoint. Uses ETag for efficient
+    GitHub requests. Database handles notification deduplication.
+
+    Since all users will trigger this via notification fetch,
+    uses Redis lock to prevent concurrent GitHub requests when cache is stale.
+    """
+    if not is_cache_stale():
+        return
+
+    # Acquire lock to prevent concurrent fetches
+    redis_client = get_shared_redis_client()
+    lock = redis_client.lock(
+        OnyxRedisLocks.RELEASE_NOTES_FETCH_LOCK,
+        timeout=90,  # 90 second timeout for the lock
+    )
+
+    # Non-blocking acquire - if we can't get the lock, another request is handling it
+    acquired = lock.acquire(blocking=False)
+    if not acquired:
+        logger.debug("Another request is already fetching release notes, skipping.")
+        return
+
+    try:
+        logger.debug("Checking GitHub for release notes updates.")
+
+        # Use ETag for conditional request
+        headers: dict[str, str] = {}
+        etag = get_cached_etag()
+        if etag:
+            headers["If-None-Match"] = etag
+
+        try:
+            response = httpx.get(
+                GITHUB_CHANGELOG_RAW_URL,
+                headers=headers,
+                timeout=FETCH_TIMEOUT,
+                follow_redirects=True,
+            )
+
+            if response.status_code == 304:
+                # Content unchanged, just update timestamp
+                logger.debug("Release notes unchanged (304).")
+                save_fetch_metadata(etag)
+                return
+
+            response.raise_for_status()
+
+            # Parse and create notifications
+            entries = parse_mdx_to_release_note_entries(response.text)
+            new_etag = response.headers.get("ETag")
+            save_fetch_metadata(new_etag)
+
+            # Create notifications, sorted semantically to create them in chronological order
+            entries = sorted(entries, key=lambda x: parse_version_tuple(x.version))
+            create_release_notifications_for_versions(db_session, entries)
+
+        except Exception as e:
+            logger.error(f"Failed to check release notes: {e}")
+            # Update timestamp even on failure to prevent retry storms
+            # We don't save etag on failure to allow retry with conditional request
+            save_fetch_metadata(None)
+    finally:
+        # Always release the lock
+        if lock.owned():
+            lock.release()
--- a/backend/onyx/server/features/web_search/api.py
+++ b/backend/onyx/server/features/web_search/api.py
@@ -22,6 +22,9 @@ from onyx.tools.tool_implementations.open_url.models import WebContentProvider
 from onyx.tools.tool_implementations.open_url.onyx_web_crawler import (
    OnyxWebCrawler,
 )
+from onyx.tools.tool_implementations.open_url.utils import (
+    filter_web_contents_with_no_title_or_content,
+)
 from onyx.tools.tool_implementations.web_search.models import WebContentProviderConfig
 from onyx.tools.tool_implementations.web_search.models import WebSearchProvider
 from onyx.tools.tool_implementations.web_search.providers import (
@@ -30,6 +33,9 @@ from onyx.tools.tool_implementations.web_search.providers import (
 from onyx.tools.tool_implementations.web_search.providers import (
    build_search_provider_from_config,
 )
+from onyx.tools.tool_implementations.web_search.utils import (
+    filter_web_search_results_with_no_title_or_snippet,
+)
 from onyx.tools.tool_implementations.web_search.utils import (
    truncate_search_result_content,
 )
@@ -156,7 +162,10 @@ def _run_web_search(
                status_code=502, detail="Web search provider failed to execute query."
            ) from exc

-        trimmed_results = list(search_results)[: request.max_results]
+        filtered_results = filter_web_search_results_with_no_title_or_snippet(
+            list(search_results)
+        )
+        trimmed_results = list(filtered_results)[: request.max_results]
        for search_result in trimmed_results:
            results.append(
                LlmWebSearchResult(
@@ -180,7 +189,9 @@ def _open_urls(
    provider_view, provider = _get_active_content_provider(db_session)

    try:
-        docs = provider.contents(urls)
+        docs = filter_web_contents_with_no_title_or_content(
+            list(provider.contents(urls))
+        )
    except HTTPException:
        raise
    except Exception as exc:
--- a/backend/onyx/server/manage/web_search/api.py
+++ b/backend/onyx/server/manage/web_search/api.py
@@ -29,6 +29,9 @@ from onyx.server.manage.web_search.models import WebContentProviderView
 from onyx.server.manage.web_search.models import WebSearchProviderTestRequest
 from onyx.server.manage.web_search.models import WebSearchProviderUpsertRequest
 from onyx.server.manage.web_search.models import WebSearchProviderView
+from onyx.tools.tool_implementations.open_url.utils import (
+    filter_web_contents_with_no_title_or_content,
+)
 from onyx.tools.tool_implementations.web_search.providers import (
    build_content_provider_from_config,
 )
@@ -353,7 +356,9 @@ def test_content_provider(
    # Actually test the API key by making a real content fetch call
    try:
        test_url = "https://example.com"
-        test_results = provider.contents([test_url])
+        test_results = filter_web_contents_with_no_title_or_content(
+            list(provider.contents([test_url]))
+        )
        if not test_results or not any(
            result.scrape_successful for result in test_results
        ):
--- a/backend/onyx/server/query_and_chat/chat_backend.py
+++ b/backend/onyx/server/query_and_chat/chat_backend.py
@@ -1,8 +1,6 @@
-import asyncio
 import datetime
 import json
 import os
-from collections.abc import AsyncGenerator
 from collections.abc import Generator
 from datetime import timedelta
 from uuid import UUID
@@ -18,8 +16,11 @@ from pydantic import BaseModel
 from redis.client import Redis
 from sqlalchemy.orm import Session

+from onyx.auth.api_key import get_hashed_api_key_from_request
+from onyx.auth.pat import get_hashed_pat_from_request
 from onyx.auth.users import current_chat_accessible_user
 from onyx.auth.users import current_user
+from onyx.chat.chat_processing_checker import is_chat_session_processing
 from onyx.chat.chat_state import ChatStateContainer
 from onyx.chat.chat_utils import create_chat_history_chain
 from onyx.chat.chat_utils import create_chat_session_from_request
@@ -87,6 +88,7 @@ from onyx.server.query_and_chat.models import ChatSessionSummary
 from onyx.server.query_and_chat.models import ChatSessionUpdateRequest
 from onyx.server.query_and_chat.models import CreateChatMessageRequest
 from onyx.server.query_and_chat.models import LLMOverride
+from onyx.server.query_and_chat.models import MessageOrigin
 from onyx.server.query_and_chat.models import PromptOverride
 from onyx.server.query_and_chat.models import RenameChatSessionResponse
 from onyx.server.query_and_chat.models import SearchFeedbackRequest
@@ -105,7 +107,6 @@ from onyx.server.utils import PUBLIC_API_TAGS
 from onyx.utils.headers import get_custom_tool_additional_request_headers
 from onyx.utils.logger import setup_logger
 from onyx.utils.telemetry import mt_cloud_telemetry
-from onyx.utils.threadpool_concurrency import run_in_background
 from shared_configs.contextvars import get_current_tenant_id

 logger = setup_logger()
@@ -292,6 +293,18 @@ def get_chat_session(
        translate_db_message_to_chat_message_detail(msg) for msg in session_messages
    ]

+    try:
+        is_processing = is_chat_session_processing(session_id, get_redis_client())
+        # Edit the last message to indicate loading (Overriding default message value)
+        if is_processing and chat_message_details:
+            last_msg = chat_message_details[-1]
+            if last_msg.message_type == MessageType.ASSISTANT:
+                last_msg.message = "Message is loading... Please refresh the page soon."
+    except Exception:
+        logger.exception(
+            "An error occurred while checking if the chat session is processing"
+        )
+
    # Every assistant message might have a set of tool calls associated with it, these need to be replayed back for the frontend
    # Each list is the set of tool calls for the given assistant message.
    replay_packet_lists: list[list[Packet]] = []
@@ -510,7 +523,7 @@ def handle_new_chat_message(


@router.post("/send-chat-message", response_model=None, tags=PUBLIC_API_TAGS)
-async def handle_send_chat_message(
+def handle_send_chat_message(
    chat_message_req: SendMessageRequest,
    request: Request,
    user: User | None = Depends(current_chat_accessible_user),
@@ -540,6 +553,11 @@ async def handle_send_chat_message(
        event=MilestoneRecordType.RAN_QUERY,
    )

+    # Override origin to API when authenticated via API key or PAT
+    # to prevent clients from polluting telemetry data
+    if get_hashed_api_key_from_request(request) or get_hashed_pat_from_request(request):
+        chat_message_req.origin = MessageOrigin.API
+
    # Non-streaming path: consume all packets and return complete response
    if not chat_message_req.stream:
        with get_session_with_current_tenant() as db_session:
@@ -575,63 +593,34 @@ async def handle_send_chat_message(
            # Note: LLM cost tracking is now handled in multi_llm.py
            return result

-    # Use prod-cons pattern to continue processing even if request stops yielding
-    buffer: asyncio.Queue[str | None] = asyncio.Queue()
-    loop = asyncio.get_running_loop()
-
-    # Capture headers before spawning thread
-    litellm_headers = extract_headers(request.headers, LITELLM_PASS_THROUGH_HEADERS)
-    custom_tool_headers = get_custom_tool_additional_request_headers(request.headers)
-
-    def producer() -> None:
-        """
-        Producer function that runs handle_stream_message_objects in a loop
-        and writes results to the buffer.
-        """
+    # Streaming path, normal Onyx UI behavior
+    def stream_generator() -> Generator[str, None, None]:
        state_container = ChatStateContainer()
        try:
-            logger.debug("Producer started")
            with get_session_with_current_tenant() as db_session:
                for obj in handle_stream_message_objects(
                    new_msg_req=chat_message_req,
                    user=user,
                    db_session=db_session,
-                    litellm_additional_headers=litellm_headers,
-                    custom_tool_additional_headers=custom_tool_headers,
+                    litellm_additional_headers=extract_headers(
+                        request.headers, LITELLM_PASS_THROUGH_HEADERS
+                    ),
+                    custom_tool_additional_headers=get_custom_tool_additional_request_headers(
+                        request.headers
+                    ),
                    external_state_container=state_container,
                ):
-                    # Thread-safe put into the asyncio queue
-                    loop.call_soon_threadsafe(
-                        buffer.put_nowait, get_json_line(obj.model_dump())
-                    )
+                    yield get_json_line(obj.model_dump())
                # Note: LLM cost tracking is now handled in multi_llm.py
+
        except Exception as e:
            logger.exception("Error in chat message streaming")
-            loop.call_soon_threadsafe(buffer.put_nowait, json.dumps({"error": str(e)}))
+            yield json.dumps({"error": str(e)})
+
        finally:
-            # Signal end of stream
-            loop.call_soon_threadsafe(buffer.put_nowait, None)
-            logger.debug("Producer finished")
+            logger.debug("Stream generator finished")

-    async def stream_from_buffer() -> AsyncGenerator[str, None]:
-        """
-        Async generator that reads from the buffer and yields to the client.
-        """
-        try:
-            while True:
-                item = await buffer.get()
-                if item is None:
-                    # End of stream signal
-                    break
-                yield item
-        except asyncio.CancelledError:
-            logger.warning("Stream cancelled (Consumer disconnected)")
-        finally:
-            logger.debug("Stream consumer finished")
-
-    run_in_background(producer)
-
-    return StreamingResponse(stream_from_buffer(), media_type="text/event-stream")
+    return StreamingResponse(stream_generator(), media_type="text/event-stream")


@router.put("/set-message-as-latest")
--- a/backend/onyx/server/query_and_chat/models.py
+++ b/backend/onyx/server/query_and_chat/models.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+from enum import Enum
 from typing import Any
 from typing import TYPE_CHECKING
 from uuid import UUID
@@ -36,6 +37,17 @@ from onyx.server.query_and_chat.streaming_models import Packet
 AUTO_PLACE_AFTER_LATEST_MESSAGE = -1


+class MessageOrigin(str, Enum):
+    """Origin of a chat message for telemetry tracking."""
+
+    WEBAPP = "webapp"
+    CHROME_EXTENSION = "chrome_extension"
+    API = "api"
+    SLACKBOT = "slackbot"
+    UNKNOWN = "unknown"
+    UNSET = "unset"
+
+
 if TYPE_CHECKING:
    pass

@@ -93,6 +105,9 @@ class SendMessageRequest(BaseModel):

    deep_research: bool = False

+    # Origin of the message for telemetry tracking
+    origin: MessageOrigin = MessageOrigin.UNSET
+
    # Placement information for the message in the conversation tree:
    # - -1: auto-place after latest message in chain
    # - null: regeneration from root (first message)
@@ -184,6 +199,9 @@ class CreateChatMessageRequest(ChunkContext):

    deep_research: bool = False

+    # Origin of the message for telemetry tracking
+    origin: MessageOrigin = MessageOrigin.UNKNOWN
+
    @model_validator(mode="after")
    def check_search_doc_ids_or_retrieval_options(self) -> "CreateChatMessageRequest":
        if self.search_doc_ids is None and self.retrieval_options is None:
--- a/backend/onyx/server/query_and_chat/query_backend.py
+++ b/backend/onyx/server/query_and_chat/query_backend.py
@@ -60,6 +60,7 @@ from onyx.server.query_and_chat.models import ChatSessionsResponse
 from onyx.server.query_and_chat.models import DocumentSearchPagination
 from onyx.server.query_and_chat.models import DocumentSearchRequest
 from onyx.server.query_and_chat.models import DocumentSearchResponse
+from onyx.server.query_and_chat.models import MessageOrigin
 from onyx.server.query_and_chat.models import OneShotQARequest
 from onyx.server.query_and_chat.models import OneShotQAResponse
 from onyx.server.query_and_chat.models import SearchSessionDetailResponse
@@ -251,6 +252,7 @@ def get_answer_stream(
    )

    # Also creates a new chat session
+    # Origin is hardcoded to API since this endpoint is only accessible via API calls
    request = prepare_chat_message_request(
        message_text=combined_message,
        user=user,
@@ -261,6 +263,7 @@ def get_answer_stream(
        rerank_settings=query_request.rerank_settings,
        db_session=db_session,
        skip_gen_ai_answer_generation=query_request.skip_gen_ai_answer_generation,
+        origin=MessageOrigin.API,
    )

    packets = stream_chat_message_objects(
--- a/backend/onyx/server/query_and_chat/session_loading.py
+++ b/backend/onyx/server/query_and_chat/session_loading.py
@@ -11,7 +11,7 @@ from onyx.db.chat import get_db_search_doc_by_id
 from onyx.db.chat import translate_db_search_doc_to_saved_search_doc
 from onyx.db.models import ChatMessage
 from onyx.db.tools import get_tool_by_id
-from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_DB_NAME
+from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_IN_CODE_ID
 from onyx.deep_research.dr_mock_tools import RESEARCH_AGENT_TASK_KEY
 from onyx.server.query_and_chat.placement import Placement
 from onyx.server.query_and_chat.streaming_models import AgentResponseDelta
@@ -23,6 +23,7 @@ from onyx.server.query_and_chat.streaming_models import GeneratedImage
 from onyx.server.query_and_chat.streaming_models import ImageGenerationFinal
 from onyx.server.query_and_chat.streaming_models import ImageGenerationToolStart
 from onyx.server.query_and_chat.streaming_models import IntermediateReportDelta
+from onyx.server.query_and_chat.streaming_models import IntermediateReportStart
 from onyx.server.query_and_chat.streaming_models import OpenUrlDocuments
 from onyx.server.query_and_chat.streaming_models import OpenUrlStart
 from onyx.server.query_and_chat.streaming_models import OpenUrlUrls
@@ -35,6 +36,7 @@ from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta
 from onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta
 from onyx.server.query_and_chat.streaming_models import SearchToolStart
 from onyx.server.query_and_chat.streaming_models import SectionEnd
+from onyx.server.query_and_chat.streaming_models import TopLevelBranching
 from onyx.tools.tool_implementations.images.image_generation_tool import (
    ImageGenerationTool,
 )
@@ -207,6 +209,7 @@ def create_research_agent_packets(
    """Create packets for research agent tool calls.
    This recreates the packet structure that ResearchAgentRenderer expects:
    - ResearchAgentStart with the research task
+    - IntermediateReportStart to signal report begins
    - IntermediateReportDelta with the report content (if available)
    - SectionEnd to mark completion
    """
@@ -222,6 +225,14 @@ def create_research_agent_packets(

    # Emit report content if available
    if report_content:
+        # Emit IntermediateReportStart before delta
+        packets.append(
+            Packet(
+                placement=Placement(turn_index=turn_index, tab_index=tab_index),
+                obj=IntermediateReportStart(),
+            )
+        )
+
        packets.append(
            Packet(
                placement=Placement(turn_index=turn_index, tab_index=tab_index),
@@ -381,10 +392,17 @@ def translate_assistant_message_to_packets(
                    )
                )

-            # Process each tool call in this turn
+            # Process each tool call in this turn (single pass).
+            # We buffer packets for the turn so we can conditionally prepend a TopLevelBranching
+            # packet (which must appear before any tool output in the turn).
+            research_agent_count = 0
+            turn_tool_packets: list[Packet] = []
            for tool_call in tool_calls_in_turn:
+                # Here we do a try because some tools may get deleted before the session is reloaded.
                try:
                    tool = get_tool_by_id(tool_call.tool_id, db_session)
+                    if tool.in_code_tool_id == RESEARCH_AGENT_IN_CODE_ID:
+                        research_agent_count += 1

                    # Handle different tool types
                    if tool.in_code_tool_id in [
@@ -398,7 +416,7 @@ def translate_assistant_message_to_packets(
                            translate_db_search_doc_to_saved_search_doc(doc)
                            for doc in tool_call.search_docs
                        ]
-                        packet_list.extend(
+                        turn_tool_packets.extend(
                            create_search_packets(
                                search_queries=queries,
                                search_docs=search_docs,
@@ -418,7 +436,7 @@ def translate_assistant_message_to_packets(
                        urls = cast(
                            list[str], tool_call.tool_call_arguments.get("urls", [])
                        )
-                        packet_list.extend(
+                        turn_tool_packets.extend(
                            create_fetch_packets(
                                fetch_docs,
                                urls,
@@ -433,20 +451,20 @@ def translate_assistant_message_to_packets(
                                GeneratedImage(**img)
                                for img in tool_call.generated_images
                            ]
-                            packet_list.extend(
+                            turn_tool_packets.extend(
                                create_image_generation_packets(
                                    images, turn_num, tab_index=tool_call.tab_index
                                )
                            )

-                    elif tool.in_code_tool_id == RESEARCH_AGENT_DB_NAME:
+                    elif tool.in_code_tool_id == RESEARCH_AGENT_IN_CODE_ID:
                        # Not ideal but not a huge issue if the research task is lost.
                        research_task = cast(
                            str,
                            tool_call.tool_call_arguments.get(RESEARCH_AGENT_TASK_KEY)
                            or "Could not fetch saved research task.",
                        )
-                        packet_list.extend(
+                        turn_tool_packets.extend(
                            create_research_agent_packets(
                                research_task=research_task,
                                report_content=tool_call.tool_call_response,
@@ -457,7 +475,7 @@ def translate_assistant_message_to_packets(

                    else:
                        # Custom tool or unknown tool
-                        packet_list.extend(
+                        turn_tool_packets.extend(
                            create_custom_tool_packets(
                                tool_name=tool.display_name or tool.name,
                                response_type="text",
@@ -471,6 +489,18 @@ def translate_assistant_message_to_packets(
                    logger.warning(f"Error processing tool call {tool_call.id}: {e}")
                    continue

+            if research_agent_count > 1:
+                # Emit TopLevelBranching before processing any tool output in the turn.
+                packet_list.append(
+                    Packet(
+                        placement=Placement(turn_index=turn_num),
+                        obj=TopLevelBranching(
+                            num_parallel_branches=research_agent_count
+                        ),
+                    )
+                )
+            packet_list.extend(turn_tool_packets)
+
    # Determine the next turn_index for the final message
    # It should come after all tool calls
    max_tool_turn = 0
@@ -539,9 +569,18 @@ def translate_assistant_message_to_packets(
        if citation_info_list:
            final_turn_index = max(final_turn_index, citation_turn_index)

+    # Determine stop reason - check if message indicates user cancelled
+    stop_reason: str | None = None
+    if chat_message.message:
+        if "Generation was stopped" in chat_message.message:
+            stop_reason = "user_cancelled"
+
    # Add overall stop packet at the end
    packet_list.append(
-        Packet(placement=Placement(turn_index=final_turn_index), obj=OverallStop())
+        Packet(
+            placement=Placement(turn_index=final_turn_index),
+            obj=OverallStop(stop_reason=stop_reason),
+        )
    )

    return packet_list
--- a/backend/onyx/tools/fake_tools/research_agent.py
+++ b/backend/onyx/tools/fake_tools/research_agent.py
@@ -410,7 +410,7 @@ def run_research_agent_call(
                    most_recent_reasoning = llm_step_result.reasoning
                    continue
                else:
-                    tool_responses, citation_mapping = run_tool_calls(
+                    parallel_tool_call_results = run_tool_calls(
                        tool_calls=tool_calls,
                        tools=current_tools,
                        message_history=msg_history,
@@ -424,6 +424,10 @@ def run_research_agent_call(
                        # May be better to not do this step, hard to say, needs to be tested
                        skip_search_query_expansion=False,
                    )
+                    tool_responses = parallel_tool_call_results.tool_responses
+                    citation_mapping = (
+                        parallel_tool_call_results.updated_citation_mapping
+                    )

                    if tool_calls and not tool_responses:
                        failure_messages = create_tool_call_failure_messages(
--- a/backend/onyx/tools/models.py
+++ b/backend/onyx/tools/models.py
@@ -25,6 +25,17 @@ TOOL_CALL_MSG_FUNC_NAME = "function_name"
 TOOL_CALL_MSG_ARGUMENTS = "arguments"


+class ToolCallException(Exception):
+    """Exception raised for errors during tool calls."""
+
+    def __init__(self, message: str, llm_facing_message: str):
+        # This is the full error message which is used for tracing
+        super().__init__(message)
+        # LLM made tool calls are acceptable and not flow terminating, this is the message
+        # which will populate the tool response.
+        self.llm_facing_message = llm_facing_message
+
+
 class SearchToolUsage(str, Enum):
    DISABLED = "disabled"
    ENABLED = "enabled"
@@ -77,6 +88,11 @@ class ToolResponse(BaseModel):
    tool_call: ToolCallKickoff | None = None


+class ParallelToolCallResponse(BaseModel):
+    tool_responses: list[ToolResponse]
+    updated_citation_mapping: dict[int, str]
+
+
 class ToolRunnerResponse(BaseModel):
    tool_run_kickoff: ToolCallKickoff | None = None
    tool_response: ToolResponse | None = None
--- a/backend/onyx/tools/tool_implementations/open_url/open_url_tool.py
+++ b/backend/onyx/tools/tool_implementations/open_url/open_url_tool.py
@@ -34,6 +34,9 @@ from onyx.tools.tool_implementations.open_url.url_normalization import (
    _default_url_normalizer,
 )
 from onyx.tools.tool_implementations.open_url.url_normalization import normalize_url
+from onyx.tools.tool_implementations.open_url.utils import (
+    filter_web_contents_with_no_title_or_content,
+)
 from onyx.tools.tool_implementations.web_search.providers import (
    get_default_content_provider,
 )
@@ -520,6 +523,11 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
            )
            return ToolResponse(rich_response=None, llm_facing_response=failure_msg)

+        for section in inference_sections:
+            chunk = section.center_chunk
+            if not chunk.semantic_identifier and chunk.source_links:
+                chunk.semantic_identifier = chunk.source_links[0]
+
        # Convert sections to search docs, preserving source information
        search_docs = convert_inference_sections_to_search_docs(
            inference_sections, is_internet=False
@@ -766,15 +774,23 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
        if not urls:
            return [], []

-        web_contents = self._provider.contents(urls)
+        raw_web_contents = self._provider.contents(urls)
+        # Treat "no title and no content" as a failure for that URL, but don't
+        # include the empty entry in downstream prompting/sections.
+        failed_urls: list[str] = [
+            content.link
+            for content in raw_web_contents
+            if not content.title.strip() and not content.full_content.strip()
+        ]
+        web_contents = filter_web_contents_with_no_title_or_content(raw_web_contents)
        sections: list[InferenceSection] = []
-        failed_urls: list[str] = []

        for content in web_contents:
            # Check if content is insufficient (e.g., "Loading..." or too short)
            text_stripped = content.full_content.strip()
            is_insufficient = (
                not text_stripped
+                # TODO: Likely a behavior of our scraper, understand why this special pattern occurs
                or text_stripped.lower() == "loading..."
                or len(text_stripped) < 50
            )
@@ -786,6 +802,9 @@ class OpenURLTool(Tool[OpenURLToolOverrideKwargs]):
            ):
                sections.append(inference_section_from_internet_page_scrape(content))
            else:
+                # TODO: Slight improvement - if failed URL reasons are passed back to the LLM
+                # for example, if it tries to crawl Reddit and fails, it should know (probably) that this error would
+                # happen again if it tried to crawl Reddit again.
                failed_urls.append(content.link or "")

        return sections, failed_urls
--- a/backend/onyx/tools/tool_implementations/open_url/utils.py
+++ b/backend/onyx/tools/tool_implementations/open_url/utils.py
@@ -0,0 +1,17 @@
+from onyx.tools.tool_implementations.open_url.models import WebContent
+
+
+def filter_web_contents_with_no_title_or_content(
+    contents: list[WebContent],
+) -> list[WebContent]:
+    """Filter out content entries that have neither a title nor any extracted text.
+
+    Some content providers can return placeholder/partial entries that only include a URL.
+    Downstream uses these fields for display + prompting; drop empty ones centrally
+    rather than duplicating checks across provider clients.
+    """
+    filtered: list[WebContent] = []
+    for content in contents:
+        if content.title.strip() or content.full_content.strip():
+            filtered.append(content)
+    return filtered
--- a/backend/onyx/tools/tool_implementations/search/search_tool.py
+++ b/backend/onyx/tools/tool_implementations/search/search_tool.py
@@ -252,14 +252,14 @@ class SearchTool(Tool[SearchToolOverrideKwargs]):

        # Store session factory instead of session for thread-safety
        # When tools are called in parallel, each thread needs its own session
-        # TODO ensure this works!!!
        self._session_bind = db_session.get_bind()
        self._session_factory = sessionmaker(bind=self._session_bind)

        self._id = tool_id

    def _get_thread_safe_session(self) -> Session:
-        """Create a new database session for the current thread.
+        """Create a new database session for the current thread. Note this is only safe for the ORM caches/identity maps,
+        pending objects, flush state, etc. But it is still using the same underlying database connection.

        This ensures thread-safety when the search tool is called in parallel.
        Each parallel execution gets its own isolated database session with
--- a/backend/onyx/tools/tool_implementations/web_search/clients/exa_client.py
+++ b/backend/onyx/tools/tool_implementations/web_search/clients/exa_client.py
@@ -1,3 +1,4 @@
+import re
 from collections.abc import Sequence

 from exa_py import Exa
@@ -19,7 +20,21 @@ from onyx.utils.retry_wrapper import retry_builder
 logger = setup_logger()


-# TODO can probably break this up
+def _extract_site_operators(query: str) -> tuple[str, list[str]]:
+    """Extract site: operators and return cleaned query + full domains.
+
+    Returns (cleaned_query, full_domains) where full_domains contains the full
+    values after site: (e.g., ["reddit.com/r/leagueoflegends"]).
+    """
+    full_domains = re.findall(r"site:\s*([^\s]+)", query, re.IGNORECASE)
+    cleaned_query = re.sub(r"site:\s*\S+\s*", "", query, flags=re.IGNORECASE).strip()
+
+    if not cleaned_query and full_domains:
+        cleaned_query = full_domains[0]
+
+    return cleaned_query, full_domains
+
+
 class ExaClient(WebSearchProvider, WebContentProvider):
    def __init__(self, api_key: str, num_results: int = 10) -> None:
        self.exa = Exa(api_key=api_key)
@@ -29,8 +44,9 @@ class ExaClient(WebSearchProvider, WebContentProvider):
    def supports_site_filter(self) -> bool:
        return False

-    @retry_builder(tries=3, delay=1, backoff=2)
-    def search(self, query: str) -> list[WebSearchResult]:
+    def _search_exa(
+        self, query: str, include_domains: list[str] | None = None
+    ) -> list[WebSearchResult]:
        response = self.exa.search_and_contents(
            query,
            type="auto",
@@ -39,22 +55,43 @@ class ExaClient(WebSearchProvider, WebContentProvider):
                highlights_per_url=1,
            ),
            num_results=self._num_results,
+            include_domains=include_domains,
        )

-        return [
-            WebSearchResult(
-                title=result.title or "",
-                link=result.url,
-                snippet=result.highlights[0] if result.highlights else "",
-                author=result.author,
-                published_date=(
-                    time_str_to_utc(result.published_date)
-                    if result.published_date
-                    else None
-                ),
+        results: list[WebSearchResult] = []
+        for result in response.results:
+            title = (result.title or "").strip()
+            snippet = (result.highlights[0] if result.highlights else "").strip()
+            results.append(
+                WebSearchResult(
+                    title=title,
+                    link=result.url,
+                    snippet=snippet,
+                    author=result.author,
+                    published_date=(
+                        time_str_to_utc(result.published_date)
+                        if result.published_date
+                        else None
+                    ),
+                )
            )
-            for result in response.results
-        ]
+
+        return results
+
+    @retry_builder(tries=3, delay=1, backoff=2)
+    def search(self, query: str) -> list[WebSearchResult]:
+        cleaned_query, full_domains = _extract_site_operators(query)
+
+        if full_domains:
+            # Try with include_domains using base domains (e.g., ["reddit.com"])
+            base_domains = [d.split("/")[0].removeprefix("www.") for d in full_domains]
+            results = self._search_exa(cleaned_query, include_domains=base_domains)
+            if results:
+                return results
+
+        # Fallback: add full domains as keywords
+        query_with_domains = f"{cleaned_query} {' '.join(full_domains)}".strip()
+        return self._search_exa(query_with_domains)

    def test_connection(self) -> dict[str, str]:
        try:
@@ -93,16 +130,23 @@ class ExaClient(WebSearchProvider, WebContentProvider):
            livecrawl="preferred",
        )

-        return [
-            WebContent(
-                title=result.title or "",
-                link=result.url,
-                full_content=result.text or "",
-                published_date=(
-                    time_str_to_utc(result.published_date)
-                    if result.published_date
-                    else None
-                ),
+        # Exa can return partial/empty content entries; skip those to avoid
+        # downstream prompt + UI pollution.
+        contents: list[WebContent] = []
+        for result in response.results:
+            title = (result.title or "").strip()
+            full_content = (result.text or "").strip()
+            contents.append(
+                WebContent(
+                    title=title,
+                    link=result.url,
+                    full_content=full_content,
+                    published_date=(
+                        time_str_to_utc(result.published_date)
+                        if result.published_date
+                        else None
+                    ),
+                )
            )
-            for result in response.results
-        ]
+
+        return contents
--- a/backend/onyx/tools/tool_implementations/web_search/clients/serper_client.py
+++ b/backend/onyx/tools/tool_implementations/web_search/clients/serper_client.py
@@ -47,20 +47,28 @@ class SerperClient(WebSearchProvider, WebContentProvider):
        response.raise_for_status()

        results = response.json()
-        organic_results = results["organic"]
+        organic_results = results.get("organic") or []

-        organic_results = filter(lambda result: "link" in result, organic_results)
+        validated_results: list[WebSearchResult] = []
+        for result in organic_results:
+            link = (result.get("link") or "").strip()
+            if not link:
+                continue

-        return [
-            WebSearchResult(
-                title=result.get("title", ""),
-                link=result.get("link"),
-                snippet=result.get("snippet", ""),
-                author=None,
-                published_date=None,
+            title = (result.get("title") or "").strip()
+            snippet = (result.get("snippet") or "").strip()
+
+            validated_results.append(
+                WebSearchResult(
+                    title=title,
+                    link=link,
+                    snippet=snippet,
+                    author=None,
+                    published_date=None,
+                )
            )
-            for result in organic_results
-        ]
+
+        return validated_results

    def test_connection(self) -> dict[str, str]:
        try:
--- a/backend/onyx/tools/tool_implementations/web_search/utils.py
+++ b/backend/onyx/tools/tool_implementations/web_search/utils.py
@@ -6,6 +6,22 @@ from onyx.tools.tool_implementations.web_search.models import WEB_SEARCH_PREFIX
 from onyx.tools.tool_implementations.web_search.models import WebSearchResult


+def filter_web_search_results_with_no_title_or_snippet(
+    results: list[WebSearchResult],
+) -> list[WebSearchResult]:
+    """Filter out results that have neither a title nor a snippet.
+
+    Some providers can return entries that only include a URL. Downstream uses
+    titles/snippets for display and prompting, so we drop those empty entries
+    centrally (rather than duplicating the check in each client).
+    """
+    filtered: list[WebSearchResult] = []
+    for result in results:
+        if result.title.strip() or result.snippet.strip():
+            filtered.append(result)
+    return filtered
+
+
 def truncate_search_result_content(content: str, max_chars: int = 15000) -> str:
    """Truncate search result content to a maximum number of characters"""
    if len(content) <= max_chars:
--- a/backend/onyx/tools/tool_implementations/web_search/web_search_tool.py
+++ b/backend/onyx/tools/tool_implementations/web_search/web_search_tool.py
@@ -1,3 +1,4 @@
+import json
 from typing import Any
 from typing import cast

@@ -15,6 +16,7 @@ from onyx.server.query_and_chat.streaming_models import SearchToolDocumentsDelta
 from onyx.server.query_and_chat.streaming_models import SearchToolQueriesDelta
 from onyx.server.query_and_chat.streaming_models import SearchToolStart
 from onyx.tools.interface import Tool
+from onyx.tools.models import ToolCallException
 from onyx.tools.models import ToolResponse
 from onyx.tools.models import WebSearchToolOverrideKwargs
 from onyx.tools.tool_implementations.utils import (
@@ -25,6 +27,9 @@ from onyx.tools.tool_implementations.web_search.models import WebSearchResult
 from onyx.tools.tool_implementations.web_search.providers import (
    build_search_provider_from_config,
 )
+from onyx.tools.tool_implementations.web_search.utils import (
+    filter_web_search_results_with_no_title_or_snippet,
+)
 from onyx.tools.tool_implementations.web_search.utils import (
    inference_section_from_internet_search_result,
 )
@@ -124,13 +129,28 @@ class WebSearchTool(Tool[WebSearchToolOverrideKwargs]):
            )
        )

-    def _execute_single_search(
+    def _safe_execute_single_search(
        self,
        query: str,
        provider: Any,
-    ) -> list[WebSearchResult]:
-        """Execute a single search query and return results."""
-        return list(provider.search(query))[:DEFAULT_MAX_RESULTS]
+    ) -> tuple[list[WebSearchResult] | None, str | None]:
+        """Execute a single search query and return results with error capture.
+
+        Returns:
+            A tuple of (results, error_message). If successful, error_message is None.
+            If failed, results is None and error_message contains the error.
+        """
+        try:
+            raw_results = list(provider.search(query))
+            filtered_results = filter_web_search_results_with_no_title_or_snippet(
+                raw_results
+            )
+            results = filtered_results[:DEFAULT_MAX_RESULTS]
+            return (results, None)
+        except Exception as e:
+            error_msg = str(e)
+            logger.warning(f"Web search query '{query}' failed: {error_msg}")
+            return (None, error_msg)

    def run(
        self,
@@ -149,22 +169,46 @@ class WebSearchTool(Tool[WebSearchToolOverrideKwargs]):
            )
        )

-        # Perform searches in parallel
+        # Perform searches in parallel with error capture
        functions_with_args = [
-            (self._execute_single_search, (query, self._provider)) for query in queries
+            (self._safe_execute_single_search, (query, self._provider))
+            for query in queries
        ]
-        search_results_per_query: list[list[WebSearchResult]] = (
-            run_functions_tuples_in_parallel(
-                functions_with_args,
-                allow_failures=True,
-            )
+        search_results_with_errors: list[
+            tuple[list[WebSearchResult] | None, str | None]
+        ] = run_functions_tuples_in_parallel(
+            functions_with_args,
+            allow_failures=False,  # Our wrapper handles errors internally
        )

+        # Separate successful results from failures
+        valid_results: list[list[WebSearchResult]] = []
+        failed_queries: dict[str, str] = {}
+
+        for query, (results, error) in zip(queries, search_results_with_errors):
+            if error is not None:
+                failed_queries[query] = error
+            elif results is not None:
+                valid_results.append(results)
+
+        # Log partial failures but continue if we have at least one success
+        if failed_queries and valid_results:
+            logger.warning(
+                f"Web search partial failure: {len(failed_queries)}/{len(queries)} "
+                f"queries failed. Failed queries: {json.dumps(failed_queries)}"
+            )
+
+        # If all queries failed, raise ToolCallException with details
+        if not valid_results:
+            error_details = json.dumps(failed_queries, indent=2)
+            raise ToolCallException(
+                message=f"All web search queries failed: {error_details}",
+                llm_facing_message=(
+                    f"All web search queries failed. Query failures:\n{error_details}"
+                ),
+            )
+
        # Interweave top results from each query in round-robin fashion
-        # Filter out None results from failures
-        valid_results = [
-            results for results in search_results_per_query if results is not None
-        ]
        all_search_results: list[WebSearchResult] = []

        if valid_results:
@@ -191,8 +235,15 @@ class WebSearchTool(Tool[WebSearchToolOverrideKwargs]):
                if not added_any:
                    break

+        # This should be a very rare case and is due to not failing loudly enough in the search provider implementation.
        if not all_search_results:
-            raise RuntimeError("No search results found.")
+            raise ToolCallException(
+                message="Web search queries succeeded but returned no results",
+                llm_facing_message=(
+                    "Web search completed but found no results for the given queries. "
+                    "Try rephrasing or using different search terms."
+                ),
+            )

        # Convert search results to InferenceSections with rank-based scoring
        inference_sections = [
@@ -214,13 +265,22 @@ class WebSearchTool(Tool[WebSearchToolOverrideKwargs]):
        )

        # Format for LLM
-        docs_str, citation_mapping = convert_inference_sections_to_llm_string(
-            top_sections=inference_sections,
-            citation_start=override_kwargs.starting_citation_num,
-            limit=None,  # Already truncated
-            include_source_type=False,
-            include_link=True,
-        )
+        if not all_search_results:
+            docs_str = json.dumps(
+                {
+                    "results": [],
+                    "message": "The web search completed but returned no results for any of the queries. Do not search again.",
+                }
+            )
+            citation_mapping: dict[int, str] = {}
+        else:
+            docs_str, citation_mapping = convert_inference_sections_to_llm_string(
+                top_sections=inference_sections,
+                citation_start=override_kwargs.starting_citation_num,
+                limit=None,  # Already truncated
+                include_source_type=False,
+                include_link=True,
+            )

        return ToolResponse(
            rich_response=SearchDocsResponse(
--- a/backend/onyx/tools/tool_runner.py
+++ b/backend/onyx/tools/tool_runner.py
@@ -11,7 +11,9 @@ from onyx.server.query_and_chat.streaming_models import SectionEnd
 from onyx.tools.interface import Tool
 from onyx.tools.models import ChatMinimalTextMessage
 from onyx.tools.models import OpenURLToolOverrideKwargs
+from onyx.tools.models import ParallelToolCallResponse
 from onyx.tools.models import SearchToolOverrideKwargs
+from onyx.tools.models import ToolCallException
 from onyx.tools.models import ToolCallKickoff
 from onyx.tools.models import ToolResponse
 from onyx.tools.models import WebSearchToolOverrideKwargs
@@ -27,6 +29,7 @@ logger = setup_logger()

 QUERIES_FIELD = "queries"
 URLS_FIELD = "urls"
+GENERIC_TOOL_ERROR_MESSAGE = "Tool failed with error: {error}"

 # Mapping of tool name to the field that should be merged when multiple calls exist
 MERGEABLE_TOOL_FIELDS: dict[str, str] = {
@@ -91,7 +94,7 @@ def _merge_tool_calls(tool_calls: list[ToolCallKickoff]) -> list[ToolCallKickoff
    return merged_calls


-def _run_single_tool(
+def _safe_run_single_tool(
    tool: Tool,
    tool_call: ToolCallKickoff,
    override_kwargs: Any,
@@ -99,7 +102,18 @@ def _run_single_tool(
    """Execute a single tool and return its response.

    This function is designed to be run in parallel via run_functions_tuples_in_parallel.
+
+    Exception handling:
+    - ToolCallException: Expected errors from tool execution (e.g., invalid input,
+      API failures). Uses the exception's llm_facing_message for LLM consumption.
+    - Other exceptions: Unexpected errors. Uses a generic error message.
+
+    In all cases (success or failure):
+    - SectionEnd packet is emitted to signal tool completion
+    - tool_call is set on the response for downstream processing
    """
+    tool_response: ToolResponse | None = None
+
    with function_span(tool.name) as span_fn:
        span_fn.span_data.input = str(tool_call.tool_args)
        try:
@@ -109,19 +123,47 @@ def _run_single_tool(
                **tool_call.tool_args,
            )
            span_fn.span_data.output = tool_response.llm_facing_response
-        except Exception as e:
-            logger.error(f"Error running tool {tool.name}: {e}")
+        except ToolCallException as e:
+            # ToolCallException is an expected error from tool execution
+            # Use llm_facing_message which is specifically designed for LLM consumption
+            logger.error(f"Tool call error for {tool.name}: {e}")
            tool_response = ToolResponse(
                rich_response=None,
-                llm_facing_response="Tool execution failed with: " + str(e),
+                llm_facing_response=GENERIC_TOOL_ERROR_MESSAGE.format(
+                    error=e.llm_facing_message
+                ),
            )
            _error_tracing.attach_error_to_current_span(
                SpanError(
-                    message="Error running tool",
+                    message="Tool call error (expected)",
                    data={
                        "tool_name": tool.name,
+                        "tool_call_id": tool_call.tool_call_id,
+                        "tool_args": tool_call.tool_args,
+                        "error": str(e),
+                        "llm_facing_message": e.llm_facing_message,
+                        "stack_trace": traceback.format_exc(),
+                        "error_type": "ToolCallException",
+                    },
+                )
+            )
+        except Exception as e:
+            # Unexpected error during tool execution
+            logger.error(f"Unexpected error running tool {tool.name}: {e}")
+            tool_response = ToolResponse(
+                rich_response=None,
+                llm_facing_response=GENERIC_TOOL_ERROR_MESSAGE.format(error=str(e)),
+            )
+            _error_tracing.attach_error_to_current_span(
+                SpanError(
+                    message="Tool execution error (unexpected)",
+                    data={
+                        "tool_name": tool.name,
+                        "tool_call_id": tool_call.tool_call_id,
+                        "tool_args": tool_call.tool_args,
                        "error": str(e),
                        "stack_trace": traceback.format_exc(),
+                        "error_type": type(e).__name__,
                    },
                )
            )
@@ -153,35 +195,52 @@ def run_tool_calls(
    max_concurrent_tools: int | None = None,
    # Skip query expansion for repeat search tool calls
    skip_search_query_expansion: bool = False,
-) -> tuple[list[ToolResponse], dict[int, str]]:
-    """Run multiple tool calls in parallel and update citation mappings.
+) -> ParallelToolCallResponse:
+    """Run (optionally merged) tool calls in parallel and update citation mappings.

-    Merges tool calls for SearchTool, WebSearchTool, and OpenURLTool before execution.
-    All tools are executed in parallel, and citation mappings are updated
-    from search tool responses.
+    Before execution, tool calls for `SearchTool`, `WebSearchTool`, and `OpenURLTool`
+    are merged so repeated calls are collapsed into a single call per tool:
+    - `SearchTool` / `WebSearchTool`: merge the `queries` list
+    - `OpenURLTool`: merge the `urls` list
+
+    Tools are executed in parallel (threadpool). For tools that generate citations,
+    each tool call is assigned a **distinct** `starting_citation_num` range to avoid
+    citation number collisions when running concurrently (the range is advanced by
+    100 per tool call).
+
+    The provided `citation_mapping` may be mutated in-place: any new
+    `SearchDocsResponse.citation_mapping` entries are merged into it.

    Args:
-        tool_calls: List of tool calls to execute
-        tools: List of available tools
-        message_history: Chat message history for context
-        memories: User memories, if available
-        user_info: User information string, if available
-        citation_mapping: Current citation number to URL mapping
-        next_citation_num: Next citation number to use
+        tool_calls: List of tool calls to execute.
+        tools: List of available tool instances.
+        message_history: Chat message history (used to find the most recent user query
+            for `SearchTool` override kwargs).
+        memories: User memories, if available (passed through to `SearchTool`).
+        user_info: User information string, if available (passed through to `SearchTool`).
+        citation_mapping: Current citation number to URL mapping. May be updated with
+            new citations produced by search tools.
+        next_citation_num: The next citation number to allocate from.
        max_concurrent_tools: Max number of tools to run in this batch. If set, any
            tool calls after this limit are dropped (not queued).
-        skip_search_query_expansion: Whether to skip query expansion for search tools
+        skip_search_query_expansion: Whether to skip query expansion for `SearchTool`
+            (intended for repeated search calls within the same chat turn).

    Returns:
-        A tuple containing:
-            - List of ToolResponse objects (each with tool_call set)
-            - Updated citation mapping dictionary
+        A `ParallelToolCallResponse` containing:
+        - `tool_responses`: `ToolResponse` objects for successfully dispatched tool calls
+          (each has `tool_call` set). If a tool execution fails at the threadpool layer,
+          its entry will be omitted.
+        - `updated_citation_mapping`: The updated citation mapping dictionary.
    """
-    # Merge tool calls for SearchTool and WebSearchTool
+    # Merge tool calls for SearchTool, WebSearchTool, and OpenURLTool
    merged_tool_calls = _merge_tool_calls(tool_calls)

    if not merged_tool_calls:
-        return [], citation_mapping
+        return ParallelToolCallResponse(
+            tool_responses=[],
+            updated_citation_mapping=citation_mapping,
+        )

    tools_by_name = {tool.name: tool for tool in tools}

@@ -196,7 +255,10 @@ def run_tool_calls(
    # Apply safety cap (drop tool calls beyond the cap)
    if max_concurrent_tools is not None:
        if max_concurrent_tools <= 0:
-            return [], citation_mapping
+            return ParallelToolCallResponse(
+                tool_responses=[],
+                updated_citation_mapping=citation_mapping,
+            )
        filtered_tool_calls = filtered_tool_calls[:max_concurrent_tools]

    # Get starting citation number from citation processor to avoid conflicts with project files
@@ -269,24 +331,29 @@ def run_tool_calls(

    # Run all tools in parallel
    functions_with_args = [
-        (_run_single_tool, (tool, tool_call, override_kwargs))
+        (_safe_run_single_tool, (tool, tool_call, override_kwargs))
        for tool, tool_call, override_kwargs in tool_run_params
    ]

-    tool_responses: list[ToolResponse] = run_functions_tuples_in_parallel(
+    tool_run_results: list[ToolResponse | None] = run_functions_tuples_in_parallel(
        functions_with_args,
        allow_failures=True,  # Continue even if some tools fail
        max_workers=max_concurrent_tools,
    )

    # Process results and update citation_mapping
-    for tool_response in tool_responses:
-        if tool_response and isinstance(
-            tool_response.rich_response, SearchDocsResponse
-        ):
-            new_citations = tool_response.rich_response.citation_mapping
+    for result in tool_run_results:
+        if result is None:
+            continue
+
+        if result and isinstance(result.rich_response, SearchDocsResponse):
+            new_citations = result.rich_response.citation_mapping
            if new_citations:
                # Merge new citations into the existing mapping
                citation_mapping.update(new_citations)

-    return tool_responses, citation_mapping
+    tool_responses = [result for result in tool_run_results if result is not None]
+    return ParallelToolCallResponse(
+        tool_responses=tool_responses,
+        updated_citation_mapping=citation_mapping,
+    )
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "onyx-backend"
 version = "0.0.0"
-requires-python = ">=3.11,<3.13"
+requires-python = ">=3.11"
 dependencies = [
    "onyx[backend,dev,ee]",
 ]
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -5,7 +5,9 @@ aioboto3==15.1.0
 aiobotocore==2.24.0
    # via aioboto3
 aiofiles==25.1.0
-    # via aioboto3
+    # via
+    #   aioboto3
+    #   unstructured-client
 aiohappyeyeballs==2.6.1
    # via aiohttp
 aiohttp==3.13.3
@@ -115,7 +117,6 @@ certifi==2025.11.12
    #   requests
    #   sentry-sdk
    #   trafilatura
-    #   unstructured-client
 cffi==2.0.0
    # via
    #   argon2-cffi-bindings
@@ -123,9 +124,7 @@ cffi==2.0.0
    #   pynacl
    #   zstandard
 chardet==5.2.0
-    # via
-    #   onyx
-    #   unstructured
+    # via onyx
 charset-normalizer==3.4.4
    # via
    #   htmldate
@@ -133,7 +132,7 @@ charset-normalizer==3.4.4
    #   pdfminer-six
    #   requests
    #   trafilatura
-    #   unstructured-client
+    #   unstructured
 chevron==0.14.0
    # via braintrust
 chonkie==1.0.10
@@ -149,6 +148,7 @@ click==8.3.1
    #   litellm
    #   magika
    #   nltk
+    #   python-oxmsg
    #   typer
    #   uvicorn
    #   zulip
@@ -185,6 +185,7 @@ cryptography==46.0.3
    #   pyjwt
    #   secretstorage
    #   sendgrid
+    #   unstructured-client
 cyclopts==4.2.4
    # via fastmcp
 dask==2023.8.1
@@ -192,17 +193,13 @@ dask==2023.8.1
    #   distributed
    #   onyx
 dataclasses-json==0.6.7
-    # via
-    #   unstructured
-    #   unstructured-client
+    # via unstructured
 dateparser==1.2.2
    # via htmldate
 ddtrace==3.10.0
    # via onyx
 decorator==5.2.1
    # via retry
-deepdiff==8.6.1
-    # via unstructured-client
 defusedxml==0.7.1
    # via
    #   jira
@@ -354,7 +351,7 @@ greenlet==3.2.4
    #   sqlalchemy
 grpc-google-iam-v1==0.14.3
    # via google-cloud-resource-manager
-grpcio==1.67.1
+grpcio==1.67.1 ; python_full_version < '3.14'
    # via
    #   google-api-core
    #   google-cloud-resource-manager
@@ -362,7 +359,17 @@ grpcio==1.67.1
    #   grpc-google-iam-v1
    #   grpcio-status
    #   litellm
-grpcio-status==1.67.1
+grpcio==1.76.0 ; python_full_version >= '3.14'
+    # via
+    #   google-api-core
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   litellm
+grpcio-status==1.67.1 ; python_full_version < '3.14'
+    # via google-api-core
+grpcio-status==1.76.0 ; python_full_version >= '3.14'
    # via google-api-core
 h11==0.16.0
    # via
@@ -374,12 +381,15 @@ hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or
    # via huggingface-hub
 hpack==4.1.0
    # via h2
+html5lib==1.1
+    # via unstructured
 htmldate==1.9.1
    # via trafilatura
 httpcore==1.0.9
    # via
    #   httpx
    #   onyx
+    #   unstructured-client
 httplib2==0.31.0
    # via
    #   google-api-python-client
@@ -420,7 +430,6 @@ idna==3.11
    #   email-validator
    #   httpx
    #   requests
-    #   unstructured-client
    #   yarl
 importlib-metadata==8.7.0
    # via
@@ -466,8 +475,6 @@ joblib==1.5.2
    # via nltk
 jsonpatch==1.33
    # via langchain-core
-jsonpath-python==1.0.6
-    # via unstructured-client
 jsonpointer==3.0.0
    # via jsonpatch
 jsonref==1.1.0
@@ -509,6 +516,8 @@ langsmith==0.3.45
    #   langchain-core
 lazy-imports==1.0.1
    # via onyx
+legacy-cgi==2.6.4 ; python_full_version >= '3.13'
+    # via ddtrace
 litellm==1.80.11
    # via onyx
 locket==1.0.0
@@ -555,9 +564,7 @@ markupsafe==3.0.3
    #   mako
    #   werkzeug
 marshmallow==3.26.2
-    # via
-    #   dataclasses-json
-    #   unstructured-client
+    # via dataclasses-json
 matrix-client==0.3.2
    # via zulip
 mcp==1.25.0
@@ -598,16 +605,13 @@ mypy-extensions==1.0.0
    # via
    #   mypy
    #   typing-inspect
-    #   unstructured-client
 nest-asyncio==1.6.0
-    # via
-    #   onyx
-    #   unstructured-client
+    # via onyx
 nltk==3.9.1
    # via
    #   onyx
    #   unstructured
-numpy==1.26.4
+numpy==2.4.1
    # via
    #   magika
    #   onnxruntime
@@ -623,7 +627,9 @@ oauthlib==3.2.2
 office365-rest-python-client==2.5.9
    # via onyx
 olefile==0.47
-    # via msoffcrypto-tool
+    # via
+    #   msoffcrypto-tool
+    #   python-oxmsg
 onnxruntime==1.20.1
    # via magika
 openai==2.14.0
@@ -678,8 +684,6 @@ opentelemetry-semantic-conventions==0.60b1
    # via
    #   opentelemetry-instrumentation
    #   opentelemetry-sdk
-orderly-set==5.5.0
-    # via deepdiff
 orjson==3.11.4 ; platform_python_implementation != 'PyPy'
    # via langsmith
 packaging==24.2
@@ -700,7 +704,6 @@ packaging==24.2
    #   opentelemetry-instrumentation
    #   pytest
    #   pywikibot
-    #   unstructured-client
 pandas==2.2.3
    # via markitdown
 parameterized==0.9.0
@@ -748,7 +751,19 @@ proto-plus==1.26.1
    #   google-api-core
    #   google-cloud-aiplatform
    #   google-cloud-resource-manager
-protobuf==5.29.5
+protobuf==5.29.5 ; python_full_version < '3.14'
+    # via
+    #   ddtrace
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   onnxruntime
+    #   opentelemetry-proto
+    #   proto-plus
+protobuf==6.33.4 ; python_full_version >= '3.14'
    # via
    #   ddtrace
    #   google-api-core
@@ -810,6 +825,7 @@ pydantic==2.11.7
    #   openapi-pydantic
    #   pyairtable
    #   pydantic-settings
+    #   unstructured-client
 pydantic-core==2.33.2
    # via pydantic
 pydantic-settings==2.12.0
@@ -835,7 +851,7 @@ pynacl==1.6.2
    # via pygithub
 pyparsing==3.2.5
    # via httplib2
-pypdf==6.1.3
+pypdf==6.6.0
    # via
    #   onyx
    #   unstructured-client
@@ -867,7 +883,6 @@ python-dateutil==2.8.2
    #   onyx
    #   opensearch-py
    #   pandas
-    #   unstructured-client
 python-docx==1.1.2
    # via onyx
 python-dotenv==1.1.1
@@ -894,6 +909,8 @@ python-multipart==0.0.20
    #   fastapi-users
    #   mcp
    #   onyx
+python-oxmsg==0.0.2
+    # via unstructured
 python-pptx==0.6.23
    # via
    #   markitdown
@@ -985,7 +1002,6 @@ requests==2.32.5
    #   stripe
    #   tiktoken
    #   unstructured
-    #   unstructured-client
    #   voyageai
    #   zeep
    #   zulip
@@ -1045,12 +1061,12 @@ six==1.17.0
    #   atlassian-python-api
    #   dropbox
    #   google-auth-httplib2
+    #   html5lib
    #   hubspot-api-client
    #   langdetect
    #   markdownify
    #   python-dateutil
    #   stone
-    #   unstructured-client
 slack-sdk==3.20.2
    # via onyx
 smmap==5.0.2
@@ -1089,8 +1105,6 @@ supervisor==4.3.0
    # via onyx
 sympy==1.13.1
    # via onnxruntime
-tabulate==0.9.0
-    # via unstructured
 tblib==3.2.2
    # via distributed
 tenacity==9.1.2
@@ -1158,6 +1172,7 @@ typing-extensions==4.15.0
    #   fastapi
    #   google-cloud-aiplatform
    #   google-genai
+    #   grpcio
    #   huggingface-hub
    #   jira
    #   langchain-core
@@ -1178,6 +1193,7 @@ typing-extensions==4.15.0
    #   pyee
    #   pygithub
    #   python-docx
+    #   python-oxmsg
    #   referencing
    #   simple-salesforce
    #   sqlalchemy
@@ -1187,12 +1203,9 @@ typing-extensions==4.15.0
    #   typing-inspect
    #   typing-inspection
    #   unstructured
-    #   unstructured-client
    #   zulip
 typing-inspect==0.9.0
-    # via
-    #   dataclasses-json
-    #   unstructured-client
+    # via dataclasses-json
 typing-inspection==0.4.2
    # via
    #   mcp
@@ -1205,9 +1218,9 @@ tzdata==2025.2
    #   tzlocal
 tzlocal==5.3.1
    # via dateparser
-unstructured==0.15.1
+unstructured==0.18.27
    # via onyx
-unstructured-client==0.25.4
+unstructured-client==0.42.6
    # via
    #   onyx
    #   unstructured
@@ -1229,7 +1242,6 @@ urllib3==2.6.3
    #   sentry-sdk
    #   trafilatura
    #   types-requests
-    #   unstructured-client
 uvicorn==0.35.0
    # via
    #   fastmcp
@@ -1244,6 +1256,8 @@ voyageai==0.2.3
    # via onyx
 wcwidth==0.2.14
    # via prompt-toolkit
+webencodings==0.5.1
+    # via html5lib
 websockets==15.0.1
    # via
    #   fastmcp
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@@ -175,7 +175,7 @@ greenlet==3.2.4 ; platform_machine == 'AMD64' or platform_machine == 'WIN32' or
    # via sqlalchemy
 grpc-google-iam-v1==0.14.3
    # via google-cloud-resource-manager
-grpcio==1.67.1
+grpcio==1.67.1 ; python_full_version < '3.14'
    # via
    #   google-api-core
    #   google-cloud-resource-manager
@@ -183,7 +183,17 @@ grpcio==1.67.1
    #   grpc-google-iam-v1
    #   grpcio-status
    #   litellm
-grpcio-status==1.67.1
+grpcio==1.76.0 ; python_full_version >= '3.14'
+    # via
+    #   google-api-core
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   litellm
+grpcio-status==1.67.1 ; python_full_version < '3.14'
+    # via google-api-core
+grpcio-status==1.76.0 ; python_full_version >= '3.14'
    # via google-api-core
 h11==0.16.0
    # via
@@ -278,7 +288,7 @@ nest-asyncio==1.6.0
    # via ipykernel
 nodeenv==1.9.1
    # via pre-commit
-numpy==1.26.4
+numpy==2.4.1
    # via
    #   contourpy
    #   matplotlib
@@ -347,7 +357,16 @@ proto-plus==1.26.1
    #   google-api-core
    #   google-cloud-aiplatform
    #   google-cloud-resource-manager
-protobuf==5.29.5
+protobuf==5.29.5 ; python_full_version < '3.14'
+    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   proto-plus
+protobuf==6.33.4 ; python_full_version >= '3.14'
    # via
    #   google-api-core
    #   google-cloud-aiplatform
@@ -546,6 +565,7 @@ typing-extensions==4.15.0
    #   fastapi
    #   google-cloud-aiplatform
    #   google-genai
+    #   grpcio
    #   huggingface-hub
    #   ipython
    #   mypy
--- a/backend/requirements/ee.txt
+++ b/backend/requirements/ee.txt
@@ -132,7 +132,7 @@ googleapis-common-protos==1.72.0
    #   grpcio-status
 grpc-google-iam-v1==0.14.3
    # via google-cloud-resource-manager
-grpcio==1.67.1
+grpcio==1.67.1 ; python_full_version < '3.14'
    # via
    #   google-api-core
    #   google-cloud-resource-manager
@@ -140,7 +140,17 @@ grpcio==1.67.1
    #   grpc-google-iam-v1
    #   grpcio-status
    #   litellm
-grpcio-status==1.67.1
+grpcio==1.76.0 ; python_full_version >= '3.14'
+    # via
+    #   google-api-core
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   litellm
+grpcio-status==1.67.1 ; python_full_version < '3.14'
+    # via google-api-core
+grpcio-status==1.76.0 ; python_full_version >= '3.14'
    # via google-api-core
 h11==0.16.0
    # via
@@ -192,7 +202,7 @@ multidict==6.7.0
    #   aiobotocore
    #   aiohttp
    #   yarl
-numpy==1.26.4
+numpy==2.4.1
    # via
    #   shapely
    #   voyageai
@@ -224,7 +234,16 @@ proto-plus==1.26.1
    #   google-api-core
    #   google-cloud-aiplatform
    #   google-cloud-resource-manager
-protobuf==5.29.5
+protobuf==5.29.5 ; python_full_version < '3.14'
+    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   proto-plus
+protobuf==6.33.4 ; python_full_version >= '3.14'
    # via
    #   google-api-core
    #   google-cloud-aiplatform
@@ -329,6 +348,7 @@ typing-extensions==4.15.0
    #   fastapi
    #   google-cloud-aiplatform
    #   google-genai
+    #   grpcio
    #   huggingface-hub
    #   openai
    #   pydantic
--- a/backend/requirements/model_server.txt
+++ b/backend/requirements/model_server.txt
@@ -157,7 +157,7 @@ googleapis-common-protos==1.72.0
    #   grpcio-status
 grpc-google-iam-v1==0.14.3
    # via google-cloud-resource-manager
-grpcio==1.67.1
+grpcio==1.67.1 ; python_full_version < '3.14'
    # via
    #   google-api-core
    #   google-cloud-resource-manager
@@ -165,7 +165,17 @@ grpcio==1.67.1
    #   grpc-google-iam-v1
    #   grpcio-status
    #   litellm
-grpcio-status==1.67.1
+grpcio==1.76.0 ; python_full_version >= '3.14'
+    # via
+    #   google-api-core
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   litellm
+grpcio-status==1.67.1 ; python_full_version < '3.14'
+    # via google-api-core
+grpcio-status==1.76.0 ; python_full_version >= '3.14'
    # via google-api-core
 h11==0.16.0
    # via
@@ -229,7 +239,7 @@ multidict==6.7.0
    #   yarl
 networkx==3.5
    # via torch
-numpy==1.26.4
+numpy==2.4.1
    # via
    #   accelerate
    #   onyx
@@ -306,7 +316,16 @@ proto-plus==1.26.1
    #   google-api-core
    #   google-cloud-aiplatform
    #   google-cloud-resource-manager
-protobuf==5.29.5
+protobuf==5.29.5 ; python_full_version < '3.14'
+    # via
+    #   google-api-core
+    #   google-cloud-aiplatform
+    #   google-cloud-resource-manager
+    #   googleapis-common-protos
+    #   grpc-google-iam-v1
+    #   grpcio-status
+    #   proto-plus
+protobuf==6.33.4 ; python_full_version >= '3.14'
    # via
    #   google-api-core
    #   google-cloud-aiplatform
@@ -450,6 +469,7 @@ typing-extensions==4.15.0
    #   fastapi
    #   google-cloud-aiplatform
    #   google-genai
+    #   grpcio
    #   huggingface-hub
    #   openai
    #   pydantic
--- a/backend/scripts/tenant_cleanup/cleanup_tenants.py
+++ b/backend/scripts/tenant_cleanup/cleanup_tenants.py
@@ -34,6 +34,7 @@ from scripts.tenant_cleanup.cleanup_utils import execute_control_plane_query
 from scripts.tenant_cleanup.cleanup_utils import find_worker_pod
 from scripts.tenant_cleanup.cleanup_utils import get_tenant_status
 from scripts.tenant_cleanup.cleanup_utils import read_tenant_ids_from_csv
+from scripts.tenant_cleanup.cleanup_utils import TenantNotFoundInControlPlaneError


 def signal_handler(signum: int, frame: object) -> None:
@@ -418,6 +419,9 @@ def cleanup_tenant(tenant_id: str, pod_name: str, force: bool = False) -> bool:
    """
    print(f"Starting cleanup for tenant: {tenant_id}")

+    # Track if tenant was not found in control plane (for force mode)
+    tenant_not_found_in_control_plane = False
+
    # Check tenant status first
    print(f"\n{'=' * 80}")
    try:
@@ -457,8 +461,25 @@ def cleanup_tenant(tenant_id: str, pod_name: str, force: bool = False) -> bool:
            if response.lower() != "yes":
                print("Cleanup aborted - could not verify tenant status")
                return False
+    except TenantNotFoundInControlPlaneError as e:
+        # Tenant/table not found in control plane
+        error_str = str(e)
+        print(f"⚠️  WARNING: Tenant not found in control plane: {error_str}")
+        tenant_not_found_in_control_plane = True
+
+        if force:
+            print(
+                "[FORCE MODE] Tenant not found in control plane - continuing with dataplane cleanup only"
+            )
+        else:
+            response = input("Continue anyway? Type 'yes' to confirm: ")
+            if response.lower() != "yes":
+                print("Cleanup aborted - tenant not found in control plane")
+                return False
    except Exception as e:
-        print(f"⚠️  WARNING: Failed to check tenant status: {e}")
+        # Other errors (not "not found")
+        error_str = str(e)
+        print(f"⚠️  WARNING: Failed to check tenant status: {error_str}")

        if force:
            print(f"Skipping cleanup for tenant {tenant_id} in force mode")
@@ -516,8 +537,14 @@ def cleanup_tenant(tenant_id: str, pod_name: str, force: bool = False) -> bool:
    else:
        print("Step 2 skipped by user")

-    # Step 3: Clean up control plane
-    if confirm_step(
+    # Step 3: Clean up control plane (skip if tenant not found in control plane with --force)
+    if tenant_not_found_in_control_plane:
+        print(f"\n{'=' * 80}")
+        print(
+            "Step 3/3: Skipping control plane cleanup (tenant not found in control plane)"
+        )
+        print(f"{'=' * 80}\n")
+    elif confirm_step(
        "Step 3/3: Delete control plane records (tenant_notification, tenant_config, subscription, tenant)",
        force,
    ):
--- a/backend/scripts/tenant_cleanup/cleanup_utils.py
+++ b/backend/scripts/tenant_cleanup/cleanup_utils.py
@@ -7,6 +7,10 @@ from dataclasses import dataclass
 from pathlib import Path


+class TenantNotFoundInControlPlaneError(Exception):
+    """Exception raised when tenant/table is not found in control plane."""
+
+
@dataclass
 class ControlPlaneConfig:
    """Configuration for connecting to the control plane database."""
@@ -136,6 +140,9 @@ def get_tenant_status(tenant_id: str) -> str | None:

    Returns:
        Tenant status string (e.g., 'GATED_ACCESS', 'ACTIVE') or None if not found
+
+    Raises:
+        TenantNotFoundInControlPlaneError: If the tenant table/relation does not exist
    """
    print(f"Fetching tenant status for tenant: {tenant_id}")

@@ -152,15 +159,18 @@ def get_tenant_status(tenant_id: str) -> str | None:
            return status
        else:
            print("⚠ Tenant not found in control plane")
-            return None
-
+            raise TenantNotFoundInControlPlaneError(
+                f"Tenant {tenant_id} not found in control plane database"
+            )
+    except TenantNotFoundInControlPlaneError:
+        # Re-raise without wrapping
+        raise
    except subprocess.CalledProcessError as e:
+        error_msg = e.stderr if e.stderr else str(e)
        print(
-            f"✗ Failed to get tenant status for {tenant_id}: {e}",
+            f"✗ Failed to get tenant status for {tenant_id}: {error_msg}",
            file=sys.stderr,
        )
-        if e.stderr:
-            print(f"  Error details: {e.stderr}", file=sys.stderr)
        return None


--- a/backend/scripts/tenant_cleanup/no_bastion_cleanup_tenants.py
+++ b/backend/scripts/tenant_cleanup/no_bastion_cleanup_tenants.py
@@ -5,10 +5,9 @@ All queries run directly from pods.
 Supports two-cluster architecture (data plane and control plane in separate clusters).

 Usage:
-    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py <tenant_id> [--force]
-    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py --csv <csv_file_path> [--force]
+    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py <tenant_id> \
+        --data-plane-context <context> --control-plane-context <context> [--force]

-    With explicit contexts:
    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py --csv <csv_file_path> \
        --data-plane-context <context> --control-plane-context <context> [--force]
 """
@@ -30,6 +29,10 @@ from scripts.tenant_cleanup.no_bastion_cleanup_utils import find_background_pod
 from scripts.tenant_cleanup.no_bastion_cleanup_utils import find_worker_pod
 from scripts.tenant_cleanup.no_bastion_cleanup_utils import get_tenant_status
 from scripts.tenant_cleanup.no_bastion_cleanup_utils import read_tenant_ids_from_csv
+from scripts.tenant_cleanup.no_bastion_cleanup_utils import (
+    TenantNotFoundInControlPlaneError,
+)
+

 # Global lock for thread-safe operations
 _print_lock: Lock = Lock()
@@ -41,12 +44,12 @@ def signal_handler(signum: int, frame: object) -> None:
    sys.exit(1)


-def setup_scripts_on_pod(pod_name: str, context: str | None = None) -> None:
+def setup_scripts_on_pod(pod_name: str, context: str) -> None:
    """Copy all required scripts to the pod once at the beginning.

    Args:
        pod_name: Pod to copy scripts to
-        context: Optional kubectl context
+        context: kubectl context for the cluster
    """
    print("Setting up scripts on pod (one-time operation)...")

@@ -66,9 +69,7 @@ def setup_scripts_on_pod(pod_name: str, context: str | None = None) -> None:
        if not local_file.exists():
            raise FileNotFoundError(f"Script not found: {local_file}")

-        cmd_cp = ["kubectl", "cp"]
-        if context:
-            cmd_cp.extend(["--context", context])
+        cmd_cp = ["kubectl", "cp", "--context", context]
        cmd_cp.extend([str(local_file), f"{pod_name}:{remote_path}"])

        subprocess.run(cmd_cp, check=True, capture_output=True)
@@ -76,15 +77,13 @@ def setup_scripts_on_pod(pod_name: str, context: str | None = None) -> None:
    print("✓ All scripts copied to pod")


-def get_tenant_index_name(
-    pod_name: str, tenant_id: str, context: str | None = None
-) -> str:
+def get_tenant_index_name(pod_name: str, tenant_id: str, context: str) -> str:
    """Get the default index name for the given tenant by running script on pod.

    Args:
        pod_name: Data plane pod to execute on
        tenant_id: Tenant ID to process
-        context: Optional kubectl context for data plane cluster
+        context: kubectl context for data plane cluster
    """
    print(f"Getting default index name for tenant: {tenant_id}")

@@ -100,9 +99,7 @@ def get_tenant_index_name(
    try:
        # Copy script to pod
        print("  Copying script to pod...")
-        cmd_cp = ["kubectl", "cp"]
-        if context:
-            cmd_cp.extend(["--context", context])
+        cmd_cp = ["kubectl", "cp", "--context", context]
        cmd_cp.extend(
            [
                str(index_name_script),
@@ -118,12 +115,9 @@ def get_tenant_index_name(

        # Execute script on pod
        print("  Executing script on pod...")
-        cmd_exec = ["kubectl", "exec"]
-        if context:
-            cmd_exec.extend(["--context", context])
+        cmd_exec = ["kubectl", "exec", "--context", context, pod_name]
        cmd_exec.extend(
            [
-                pod_name,
                "--",
                "python",
                "/tmp/get_tenant_index_name.py",
@@ -168,25 +162,20 @@ def get_tenant_index_name(
        raise


-def get_tenant_users(
-    pod_name: str, tenant_id: str, context: str | None = None
-) -> list[str]:
+def get_tenant_users(pod_name: str, tenant_id: str, context: str) -> list[str]:
    """Get list of user emails from the tenant's data plane schema.

    Args:
        pod_name: Data plane pod to execute on
        tenant_id: Tenant ID to process
-        context: Optional kubectl context for data plane cluster
+        context: kubectl context for data plane cluster
    """
    # Script is already on pod from setup_scripts_on_pod()
    try:
        # Execute script on pod
-        cmd_exec = ["kubectl", "exec"]
-        if context:
-            cmd_exec.extend(["--context", context])
+        cmd_exec = ["kubectl", "exec", "--context", context, pod_name]
        cmd_exec.extend(
            [
-                pod_name,
                "--",
                "python",
                "/tmp/get_tenant_users.py",
@@ -233,25 +222,20 @@ def get_tenant_users(
        return []


-def check_documents_deleted(
-    pod_name: str, tenant_id: str, context: str | None = None
-) -> None:
+def check_documents_deleted(pod_name: str, tenant_id: str, context: str) -> None:
    """Check if all documents and connector credential pairs have been deleted.

    Args:
        pod_name: Data plane pod to execute on
        tenant_id: Tenant ID to process
-        context: Optional kubectl context for data plane cluster
+        context: kubectl context for data plane cluster
    """
    # Script is already on pod from setup_scripts_on_pod()
    try:
        # Execute script on pod
-        cmd_exec = ["kubectl", "exec"]
-        if context:
-            cmd_exec.extend(["--context", context])
+        cmd_exec = ["kubectl", "exec", "--context", context, pod_name]
        cmd_exec.extend(
            [
-                pod_name,
                "--",
                "python",
                "/tmp/check_documents_deleted.py",
@@ -305,25 +289,20 @@ def check_documents_deleted(
        raise


-def drop_data_plane_schema(
-    pod_name: str, tenant_id: str, context: str | None = None
-) -> None:
+def drop_data_plane_schema(pod_name: str, tenant_id: str, context: str) -> None:
    """Drop the PostgreSQL schema for the given tenant by running script on pod.

    Args:
        pod_name: Data plane pod to execute on
        tenant_id: Tenant ID to process
-        context: Optional kubectl context for data plane cluster
+        context: kubectl context for data plane cluster
    """
    # Script is already on pod from setup_scripts_on_pod()
    try:
        # Execute script on pod
-        cmd_exec = ["kubectl", "exec"]
-        if context:
-            cmd_exec.extend(["--context", context])
+        cmd_exec = ["kubectl", "exec", "--context", context, pod_name]
        cmd_exec.extend(
            [
-                pod_name,
                "--",
                "python",
                "/tmp/cleanup_tenant_schema.py",
@@ -366,14 +345,14 @@ def drop_data_plane_schema(


 def cleanup_control_plane(
-    pod_name: str, tenant_id: str, context: str | None = None, force: bool = False
+    pod_name: str, tenant_id: str, context: str, force: bool = False
 ) -> None:
    """Clean up control plane data via pod queries.

    Args:
        pod_name: Control plane pod to execute on
        tenant_id: Tenant ID to process
-        context: Optional kubectl context for control plane cluster
+        context: kubectl context for control plane cluster
        force: Skip confirmations if True
    """
    print(f"Cleaning up control plane data for tenant: {tenant_id}")
@@ -413,8 +392,8 @@ def cleanup_tenant(
    tenant_id: str,
    data_plane_pod: str,
    control_plane_pod: str,
-    data_plane_context: str | None = None,
-    control_plane_context: str | None = None,
+    data_plane_context: str,
+    control_plane_context: str,
    force: bool = False,
 ) -> bool:
    """Main cleanup function that orchestrates all cleanup steps.
@@ -423,12 +402,15 @@ def cleanup_tenant(
        tenant_id: Tenant ID to process
        data_plane_pod: Data plane pod for schema operations
        control_plane_pod: Control plane pod for tenant record operations
-        data_plane_context: Optional kubectl context for data plane cluster
-        control_plane_context: Optional kubectl context for control plane cluster
+        data_plane_context: kubectl context for data plane cluster
+        control_plane_context: kubectl context for control plane cluster
        force: Skip confirmations if True
    """
    print(f"Starting cleanup for tenant: {tenant_id}")

+    # Track if tenant was not found in control plane (for force mode)
+    tenant_not_found_in_control_plane = False
+
    # Check tenant status first (from control plane)
    print(f"\n{'=' * 80}")
    try:
@@ -470,8 +452,25 @@ def cleanup_tenant(
            if response.lower() != "yes":
                print("Cleanup aborted - could not verify tenant status")
                return False
+    except TenantNotFoundInControlPlaneError as e:
+        # Tenant/table not found in control plane
+        error_str = str(e)
+        print(f"⚠️  WARNING: Tenant not found in control plane: {error_str}")
+        tenant_not_found_in_control_plane = True
+
+        if force:
+            print(
+                "[FORCE MODE] Tenant not found in control plane - continuing with dataplane cleanup only"
+            )
+        else:
+            response = input("Continue anyway? Type 'yes' to confirm: ")
+            if response.lower() != "yes":
+                print("Cleanup aborted - tenant not found in control plane")
+                return False
    except Exception as e:
-        print(f"⚠️  WARNING: Failed to check tenant status: {e}")
+        # Other errors (not "not found")
+        error_str = str(e)
+        print(f"⚠️  WARNING: Failed to check tenant status: {error_str}")

        if force:
            print(f"Skipping cleanup for tenant {tenant_id} in force mode")
@@ -528,8 +527,14 @@ def cleanup_tenant(
    else:
        print("Step 2 skipped by user")

-    # Step 3: Clean up control plane
-    if confirm_step(
+    # Step 3: Clean up control plane (skip if tenant not found in control plane with --force)
+    if tenant_not_found_in_control_plane:
+        print(f"\n{'=' * 80}")
+        print(
+            "Step 3/3: Skipping control plane cleanup (tenant not found in control plane)"
+        )
+        print(f"{'=' * 80}\n")
+    elif confirm_step(
        "Step 3/3: Delete control plane records (tenant_notification, tenant_config, subscription, tenant)",
        force,
    ):
@@ -560,12 +565,11 @@ def main() -> None:

    if len(sys.argv) < 2:
        print(
-            "Usage: PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py <tenant_id> [--force]"
+            "Usage: PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py <tenant_id> \\"
        )
        print(
-            "       PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py --csv <csv_file_path> [--force]"
+            "           --data-plane-context <context> --control-plane-context <context> [--force]"
        )
-        print("\nTwo-cluster architecture (with explicit contexts):")
        print(
            "       PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_cleanup_tenants.py --csv <csv_file_path> \\"
        )
@@ -575,20 +579,20 @@ def main() -> None:
        print("\nThis version runs ALL operations from pods (no bastion required)")
        print("\nArguments:")
        print(
-            "  tenant_id                  The tenant ID to clean up (required if not using --csv)"
+            "  tenant_id                   The tenant ID to clean up (required if not using --csv)"
        )
        print(
-            "  --csv PATH                 Path to CSV file containing tenant IDs to clean up"
+            "  --csv PATH                  Path to CSV file containing tenant IDs to clean up"
        )
-        print("  --force                    Skip all confirmation prompts (optional)")
+        print("  --force                     Skip all confirmation prompts (optional)")
        print(
-            "  --concurrency N            Process N tenants concurrently (default: 1)"
+            "  --concurrency N             Process N tenants concurrently (default: 1)"
        )
        print(
-            "  --data-plane-context CTX   Kubectl context for data plane cluster (optional)"
+            "  --data-plane-context CTX    Kubectl context for data plane cluster (required)"
        )
        print(
-            "  --control-plane-context CTX Kubectl context for control plane cluster (optional)"
+            "  --control-plane-context CTX Kubectl context for control plane cluster (required)"
        )
        sys.exit(1)

@@ -620,7 +624,7 @@ def main() -> None:
        )
        sys.exit(1)

-    # Parse contexts
+    # Parse contexts (required)
    data_plane_context: str | None = None
    control_plane_context: str | None = None

@@ -650,6 +654,21 @@ def main() -> None:
        except ValueError:
            pass

+    # Validate required contexts
+    if not data_plane_context:
+        print(
+            "Error: --data-plane-context is required",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    if not control_plane_context:
+        print(
+            "Error: --control-plane-context is required",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
    # Check for CSV mode
    if "--csv" in sys.argv:
        try:
--- a/backend/scripts/tenant_cleanup/no_bastion_cleanup_utils.py
+++ b/backend/scripts/tenant_cleanup/no_bastion_cleanup_utils.py
@@ -10,19 +10,19 @@ import sys
 from pathlib import Path


-def find_worker_pod(context: str | None = None) -> str:
+class TenantNotFoundInControlPlaneError(Exception):
+    """Exception raised when tenant/table is not found in control plane."""
+
+
+def find_worker_pod(context: str) -> str:
    """Find a user file processing worker pod using kubectl.

    Args:
-        context: Optional kubectl context to use
+        context: kubectl context to use
    """
-    print(
-        f"Finding user file processing worker pod{f' in context {context}' if context else ''}..."
-    )
+    print(f"Finding user file processing worker pod in context {context}...")

-    cmd = ["kubectl", "get", "po"]
-    if context:
-        cmd.extend(["--context", context])
+    cmd = ["kubectl", "get", "po", "--context", context]

    result = subprocess.run(cmd, capture_output=True, text=True, check=True)

@@ -43,17 +43,15 @@ def find_worker_pod(context: str | None = None) -> str:
    raise RuntimeError("No running user file processing worker pod found")


-def find_background_pod(context: str | None = None) -> str:
-    """Find a background/api-server pod for control plane operations.
+def find_background_pod(context: str) -> str:
+    """Find a pod for control plane operations.

    Args:
-        context: Optional kubectl context to use
+        context: kubectl context to use
    """
-    print(f"Finding background/api pod{f' in context {context}' if context else ''}...")
+    print(f"Finding control plane pod in context {context}...")

-    cmd = ["kubectl", "get", "po"]
-    if context:
-        cmd.extend(["--context", context])
+    cmd = ["kubectl", "get", "po", "--context", context]

    result = subprocess.run(cmd, capture_output=True, text=True, check=True)

@@ -65,16 +63,15 @@ def find_background_pod(context: str | None = None) -> str:

    random.shuffle(lines)

-    # Try to find api-server, background worker, or any celery worker
+    # Try to find control plane pods
    for line in lines:
        if (
            any(
                name in line
                for name in [
-                    "api-server",
-                    "celery-worker-light",
-                    "celery-worker-primary",
-                    "background",
+                    "background-processing-deployment",
+                    "subscription-deployment",
+                    "tenants-deployment",
                ]
            )
            and "Running" in line
@@ -106,20 +103,23 @@ def confirm_step(message: str, force: bool = False) -> bool:


 def execute_control_plane_query_from_pod(
-    pod_name: str, query: str, context: str | None = None
+    pod_name: str, query: str, context: str
 ) -> dict:
    """Execute a SQL query against control plane database from within a pod.

    Args:
        pod_name: The Kubernetes pod name to execute from
        query: The SQL query to execute
-        context: Optional kubectl context for control plane cluster
+        context: kubectl context for control plane cluster

    Returns:
        Dict with 'success' bool, 'stdout' str, and optional 'error' str
    """
    # Create a Python script to run the query
    # This script tries multiple environment variable patterns
+
+    # NOTE: whuang 01/08/2026: POSTGRES_CONTROL_* don't exist. This uses pattern 2 currently.
+
    query_script = f'''
 import os
 from sqlalchemy import create_engine, text
@@ -175,9 +175,7 @@ with engine.connect() as conn:
    script_path = "/tmp/control_plane_query.py"

    try:
-        cmd_write = ["kubectl", "exec", pod_name]
-        if context:
-            cmd_write.extend(["--context", context])
+        cmd_write = ["kubectl", "exec", "--context", context, pod_name]
        cmd_write.extend(
            [
                "--",
@@ -194,9 +192,7 @@ with engine.connect() as conn:
        )

        # Execute the script
-        cmd_exec = ["kubectl", "exec", pod_name]
-        if context:
-            cmd_exec.extend(["--context", context])
+        cmd_exec = ["kubectl", "exec", "--context", context, pod_name]
        cmd_exec.extend(["--", "python", script_path])

        result = subprocess.run(
@@ -220,19 +216,20 @@ with engine.connect() as conn:
        }


-def get_tenant_status(
-    pod_name: str, tenant_id: str, context: str | None = None
-) -> str | None:
+def get_tenant_status(pod_name: str, tenant_id: str, context: str) -> str | None:
    """
    Get tenant status from control plane database via pod.

    Args:
        pod_name: The pod to execute the query from
        tenant_id: The tenant ID to look up
-        context: Optional kubectl context for control plane cluster
+        context: kubectl context for control plane cluster

    Returns:
        Tenant status string (e.g., 'GATED_ACCESS', 'ACTIVE') or None if not found
+
+    Raises:
+        TenantNotFoundInControlPlaneError: If the tenant record is not found in the table
    """
    print(f"Fetching tenant status for tenant: {tenant_id}")

@@ -241,8 +238,9 @@ def get_tenant_status(
    result = execute_control_plane_query_from_pod(pod_name, query, context)

    if not result["success"]:
+        error_msg = result.get("error", "Unknown error")
        print(
-            f"✗ Failed to get tenant status for {tenant_id}: {result.get('error', 'Unknown error')}",
+            f"✗ Failed to get tenant status for {tenant_id}: {error_msg}",
            file=sys.stderr,
        )
        return None
@@ -257,23 +255,27 @@ def get_tenant_status(
                print(f"✓ Tenant status: {status}")
                return status

+        # Tenant record not found in control plane table
        print("⚠ Tenant not found in control plane")
-        return None
+        raise TenantNotFoundInControlPlaneError(
+            f"Tenant {tenant_id} not found in control plane database"
+        )

+    except TenantNotFoundInControlPlaneError:
+        # Re-raise without wrapping
+        raise
    except (json.JSONDecodeError, KeyError, IndexError) as e:
        print(f"✗ Failed to parse tenant status: {e}", file=sys.stderr)
        return None


-def execute_control_plane_delete(
-    pod_name: str, query: str, context: str | None = None
-) -> bool:
+def execute_control_plane_delete(pod_name: str, query: str, context: str) -> bool:
    """Execute a DELETE query against control plane database from pod.

    Args:
        pod_name: The pod to execute the query from
        query: The DELETE query to execute
-        context: Optional kubectl context for control plane cluster
+        context: kubectl context for control plane cluster

    Returns:
        True if successful, False otherwise
--- a/backend/scripts/tenant_cleanup/no_bastion_mark_connectors.py
+++ b/backend/scripts/tenant_cleanup/no_bastion_mark_connectors.py
@@ -5,10 +5,9 @@ All queries run directly from pods.
 Supports two-cluster architecture (data plane and control plane in separate clusters).

 Usage:
-    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py <tenant_id> [--force]
-    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py --csv <csv_file_path> [--force] [--concurrency N]
+    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py <tenant_id> \
+        --data-plane-context <context> --control-plane-context <context> [--force]

-    With explicit contexts:
    PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py --csv <csv_file_path> \
        --data-plane-context <context> --control-plane-context <context> [--force] [--concurrency N]
 """
@@ -26,6 +25,9 @@ from scripts.tenant_cleanup.no_bastion_cleanup_utils import find_background_pod
 from scripts.tenant_cleanup.no_bastion_cleanup_utils import find_worker_pod
 from scripts.tenant_cleanup.no_bastion_cleanup_utils import get_tenant_status
 from scripts.tenant_cleanup.no_bastion_cleanup_utils import read_tenant_ids_from_csv
+from scripts.tenant_cleanup.no_bastion_cleanup_utils import (
+    TenantNotFoundInControlPlaneError,
+)

 # Global lock for thread-safe printing
 _print_lock: Lock = Lock()
@@ -37,15 +39,13 @@ def safe_print(*args: Any, **kwargs: Any) -> None:
        print(*args, **kwargs)


-def run_connector_deletion(
-    pod_name: str, tenant_id: str, context: str | None = None
-) -> None:
+def run_connector_deletion(pod_name: str, tenant_id: str, context: str) -> None:
    """Mark all connector credential pairs for deletion.

    Args:
        pod_name: Data plane pod to execute deletion on
        tenant_id: Tenant ID to process
-        context: Optional kubectl context for data plane cluster
+        context: kubectl context for data plane cluster
    """
    safe_print("  Marking all connector credential pairs for deletion...")

@@ -62,9 +62,7 @@ def run_connector_deletion(

    try:
        # Copy script to pod
-        cmd_cp = ["kubectl", "cp"]
-        if context:
-            cmd_cp.extend(["--context", context])
+        cmd_cp = ["kubectl", "cp", "--context", context]
        cmd_cp.extend(
            [
                str(mark_deletion_script),
@@ -79,12 +77,9 @@ def run_connector_deletion(
        )

        # Execute script on pod
-        cmd_exec = ["kubectl", "exec"]
-        if context:
-            cmd_exec.extend(["--context", context])
+        cmd_exec = ["kubectl", "exec", "--context", context, pod_name]
        cmd_exec.extend(
            [
-                pod_name,
                "--",
                "python",
                "/tmp/execute_connector_deletion.py",
@@ -118,8 +113,8 @@ def mark_tenant_connectors_for_deletion(
    tenant_id: str,
    data_plane_pod: str,
    control_plane_pod: str,
-    data_plane_context: str | None = None,
-    control_plane_context: str | None = None,
+    data_plane_context: str,
+    control_plane_context: str,
    force: bool = False,
 ) -> None:
    """Main function to mark all connectors for a tenant for deletion.
@@ -128,8 +123,8 @@ def mark_tenant_connectors_for_deletion(
        tenant_id: Tenant ID to process
        data_plane_pod: Data plane pod for connector operations
        control_plane_pod: Control plane pod for status checks
-        data_plane_context: Optional kubectl context for data plane cluster
-        control_plane_context: Optional kubectl context for control plane cluster
+        data_plane_context: kubectl context for data plane cluster
+        control_plane_context: kubectl context for control plane cluster
        force: Skip confirmations if True
    """
    safe_print(f"Processing connectors for tenant: {tenant_id}")
@@ -174,6 +169,23 @@ def mark_tenant_connectors_for_deletion(
                    )
            else:
                raise RuntimeError(f"Could not verify tenant status for {tenant_id}")
+    except TenantNotFoundInControlPlaneError as e:
+        # Tenant/table not found in control plane
+        error_str = str(e)
+        safe_print(f"⚠️  WARNING: Tenant not found in control plane: {error_str}")
+
+        if force:
+            safe_print(
+                "[FORCE MODE] Tenant not found in control plane - continuing with connector deletion anyway"
+            )
+        else:
+            response = input("Continue anyway? Type 'yes' to confirm: ")
+            if response.lower() != "yes":
+                safe_print("Operation aborted - tenant not found in control plane")
+                raise RuntimeError(f"Tenant {tenant_id} not found in control plane")
+    except RuntimeError:
+        # Re-raise RuntimeError (from status checks above) without wrapping
+        raise
    except Exception as e:
        safe_print(f"⚠️  WARNING: Failed to check tenant status: {e}")
        if not force:
@@ -205,16 +217,14 @@ def main() -> None:
    if len(sys.argv) < 2:
        print(
            "Usage: PYTHONPATH=. python scripts/tenant_cleanup/"
-            "no_bastion_mark_connectors.py <tenant_id> [--force] [--concurrency N]"
+            "no_bastion_mark_connectors.py <tenant_id> \\"
+        )
+        print(
+            "           --data-plane-context <context> --control-plane-context <context> [--force]"
        )
        print(
            "       PYTHONPATH=. python scripts/tenant_cleanup/"
-            "no_bastion_mark_connectors.py --csv <csv_file_path> "
-            "[--force] [--concurrency N]"
-        )
-        print("\nTwo-cluster architecture (with explicit contexts):")
-        print(
-            "       PYTHONPATH=. python scripts/tenant_cleanup/no_bastion_mark_connectors.py --csv <csv_file_path> \\"
+            "no_bastion_mark_connectors.py --csv <csv_file_path> \\"
        )
        print(
            "           --data-plane-context <context> --control-plane-context <context> [--force] [--concurrency N]"
@@ -222,20 +232,20 @@ def main() -> None:
        print("\nThis version runs ALL operations from pods (no bastion required)")
        print("\nArguments:")
        print(
-            "  tenant_id                  The tenant ID to process (required if not using --csv)"
+            "  tenant_id                   The tenant ID to process (required if not using --csv)"
        )
        print(
-            "  --csv PATH                 Path to CSV file containing tenant IDs to process"
+            "  --csv PATH                  Path to CSV file containing tenant IDs to process"
        )
-        print("  --force                    Skip all confirmation prompts (optional)")
+        print("  --force                     Skip all confirmation prompts (optional)")
        print(
-            "  --concurrency N            Process N tenants concurrently (default: 1)"
+            "  --concurrency N             Process N tenants concurrently (default: 1)"
        )
        print(
-            "  --data-plane-context CTX   Kubectl context for data plane cluster (optional)"
+            "  --data-plane-context CTX    Kubectl context for data plane cluster (required)"
        )
        print(
-            "  --control-plane-context CTX Kubectl context for control plane cluster (optional)"
+            "  --control-plane-context CTX Kubectl context for control plane cluster (required)"
        )
        sys.exit(1)

@@ -243,7 +253,7 @@ def main() -> None:
    force = "--force" in sys.argv
    tenant_ids: list[str] = []

-    # Parse contexts
+    # Parse contexts (required)
    data_plane_context: str | None = None
    control_plane_context: str | None = None

@@ -273,6 +283,21 @@ def main() -> None:
        except ValueError:
            pass

+    # Validate required contexts
+    if not data_plane_context:
+        print(
+            "Error: --data-plane-context is required",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    if not control_plane_context:
+        print(
+            "Error: --control-plane-context is required",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
    # Parse concurrency
    concurrency: int = 1
    if "--concurrency" in sys.argv:
--- a/backend/shared_configs/configs.py
+++ b/backend/shared_configs/configs.py
@@ -236,10 +236,10 @@ USAGE_LIMIT_LLM_COST_CENTS_PAID = int(

 # Per-week chunks indexed limits
 USAGE_LIMIT_CHUNKS_INDEXED_TRIAL = int(
-    os.environ.get("USAGE_LIMIT_CHUNKS_INDEXED_TRIAL", "10000")
+    os.environ.get("USAGE_LIMIT_CHUNKS_INDEXED_TRIAL", 100_000)
 )
 USAGE_LIMIT_CHUNKS_INDEXED_PAID = int(
-    os.environ.get("USAGE_LIMIT_CHUNKS_INDEXED_PAID", "50000")
+    os.environ.get("USAGE_LIMIT_CHUNKS_INDEXED_PAID", 1_000_000)
 )

 # Per-week API calls using API keys or Personal Access Tokens
--- a/backend/tests/external_dependency_unit/llm/test_prompt_caching.py
+++ b/backend/tests/external_dependency_unit/llm/test_prompt_caching.py
@@ -397,6 +397,7 @@ def test_anthropic_prompt_caching_reduces_costs(
    not os.environ.get(VERTEX_LOCATION_ENV),
    reason="VERTEX_LOCATION required for Vertex AI context caching (e.g., 'us-central1')",
 )
+@pytest.mark.skip(reason="Vertex AI prompt caching is disabled for now")
 def test_google_genai_prompt_caching_reduces_costs(
    db_session: Session,
 ) -> None:
--- a/backend/tests/integration/common_utils/managers/chat.py
+++ b/backend/tests/integration/common_utils/managers/chat.py
@@ -164,6 +164,87 @@ class ChatSessionManager:

        return streamed_response

+    @staticmethod
+    def send_message_with_disconnect(
+        chat_session_id: UUID,
+        message: str,
+        disconnect_after_packets: int = 0,
+        parent_message_id: int | None = None,
+        user_performing_action: DATestUser | None = None,
+        file_descriptors: list[FileDescriptor] | None = None,
+        search_doc_ids: list[int] | None = None,
+        retrieval_options: RetrievalDetails | None = None,
+        query_override: str | None = None,
+        regenerate: bool | None = None,
+        llm_override: LLMOverride | None = None,
+        prompt_override: PromptOverride | None = None,
+        alternate_assistant_id: int | None = None,
+        use_existing_user_message: bool = False,
+        forced_tool_ids: list[int] | None = None,
+    ) -> None:
+        """
+        Send a message and simulate client disconnect before stream completes.
+
+        This is useful for testing how the server handles client disconnections
+        during streaming responses.
+
+        Args:
+            chat_session_id: The chat session ID
+            message: The message to send
+            disconnect_after_packets: Disconnect after receiving this many packets.
+                If None, disconnect_after_type must be specified.
+            disconnect_after_type: Disconnect after receiving a packet of this type
+                (e.g., "message_start", "search_tool_start"). If None,
+                disconnect_after_packets must be specified.
+            ... (other standard message parameters)
+
+        Returns:
+            StreamedResponse containing data received before disconnect,
+            with is_disconnected=True flag set.
+        """
+        chat_message_req = CreateChatMessageRequest(
+            chat_session_id=chat_session_id,
+            parent_message_id=parent_message_id,
+            message=message,
+            file_descriptors=file_descriptors or [],
+            search_doc_ids=search_doc_ids or [],
+            retrieval_options=retrieval_options,
+            rerank_settings=None,
+            query_override=query_override,
+            regenerate=regenerate,
+            llm_override=llm_override,
+            prompt_override=prompt_override,
+            alternate_assistant_id=alternate_assistant_id,
+            use_existing_user_message=use_existing_user_message,
+            forced_tool_ids=forced_tool_ids,
+        )
+
+        headers = (
+            user_performing_action.headers
+            if user_performing_action
+            else GENERAL_HEADERS
+        )
+        cookies = user_performing_action.cookies if user_performing_action else None
+
+        packets_received = 0
+
+        with requests.post(
+            f"{API_SERVER_URL}/chat/send-message",
+            json=chat_message_req.model_dump(),
+            headers=headers,
+            stream=True,
+            cookies=cookies,
+        ) as response:
+            for line in response.iter_lines():
+                if not line:
+                    continue
+
+                packets_received += 1
+                if packets_received > disconnect_after_packets:
+                    break
+
+        return None
+
    @staticmethod
    def analyze_response(response: Response) -> StreamedResponse:
        response_data = cast(
--- a/backend/tests/integration/tests/llm_auto_update/test_auto_llm_update.py
+++ b/backend/tests/integration/tests/llm_auto_update/test_auto_llm_update.py
@@ -18,25 +18,15 @@ import pytest
 import requests

 from tests.integration.common_utils.constants import API_SERVER_URL
-from tests.integration.common_utils.reset import reset_all
 from tests.integration.common_utils.test_models import DATestUser

-# Skip all tests in this module
-pytestmark = pytest.mark.skip(reason="Auto LLM update tests temporarily disabled")
-

 # How long to wait for the celery task to run and sync models
 # This should be longer than AUTO_LLM_UPDATE_INTERVAL_SECONDS
-MAX_WAIT_TIME_SECONDS = 60
+MAX_WAIT_TIME_SECONDS = 120
 POLL_INTERVAL_SECONDS = 5


-@pytest.fixture(scope="module", autouse=True)
-def reset_for_module() -> None:
-    """Reset all data once before running any tests in this module."""
-    reset_all()
-
-
 def _create_provider_with_api(
    admin_user: DATestUser,
    name: str,
@@ -142,6 +132,7 @@ def wait_for_model_sync(


 def test_auto_mode_provider_gets_synced_from_github_config(
+    reset: None,
    admin_user: DATestUser,
 ) -> None:
    """
@@ -156,7 +147,7 @@ def test_auto_mode_provider_gets_synced_from_github_config(
    # First, get the GitHub config to know what models we should expect
    github_config = get_auto_config(admin_user)
    if github_config is None:
-        pytest.skip("GitHub config not found")
+        pytest.fail("GitHub config not found")

    # Get expected models for OpenAI from the config
    if "openai" not in github_config.get("providers", {}):
@@ -207,17 +198,26 @@ def test_auto_mode_provider_gets_synced_from_github_config(
    )

    # Verify the models were synced
-    synced_model_names = {m["name"] for m in synced_provider["model_configurations"]}
+    synced_model_configs = synced_provider["model_configurations"]
+    synced_model_names = {m["name"] for m in synced_model_configs}
    print(f"Synced models: {synced_model_names}")

    assert expected_models.issubset(
        synced_model_names
    ), f"Expected models {expected_models} not found in synced models {synced_model_names}"

-    # Verify the outdated model was removed
+    # Verify the outdated model still exists but is not visible
+    # (Auto mode marks removed models as not visible, it doesn't delete them)
+    outdated_model = next(
+        (m for m in synced_model_configs if m["name"] == "outdated-model-name"),
+        None,
+    )
    assert (
-        "outdated-model-name" not in synced_model_names
-    ), "Outdated model should have been removed by sync"
+        outdated_model is not None
+    ), "Outdated model should still exist after sync (marked invisible, not deleted)"
+    assert not outdated_model[
+        "is_visible"
+    ], "Outdated model should not be visible after sync"

    # Verify default model was set from GitHub config
    expected_default = (
@@ -230,6 +230,7 @@ def test_auto_mode_provider_gets_synced_from_github_config(


 def test_manual_mode_provider_not_affected_by_auto_sync(
+    reset: None,
    admin_user: DATestUser,
 ) -> None:
    """
--- a/backend/tests/integration/tests/migrations/test_assistant_consolidation_migration.py
+++ b/backend/tests/integration/tests/migrations/test_assistant_consolidation_migration.py
@@ -61,13 +61,13 @@ def test_cold_startup_default_assistant() -> None:

        # Verify all three main tools are attached
        assert (
-            "SearchTool" in tool_names
+            "internal_search" in tool_names
        ), "Default assistant should have SearchTool attached"
        assert (
-            "ImageGenerationTool" in tool_names
+            "generate_image" in tool_names
        ), "Default assistant should have ImageGenerationTool attached"
        assert (
-            "WebSearchTool" in tool_names
+            "web_search" in tool_names
        ), "Default assistant should have WebSearchTool attached"

        # Also verify by display names for clarity
--- a/backend/tests/integration/tests/migrations/test_tool_seeding.py
+++ b/backend/tests/integration/tests/migrations/test_tool_seeding.py
@@ -1,3 +1,4 @@
+from pydantic import BaseModel
 from sqlalchemy import text

 from onyx.db.engine.sql_engine import get_session_with_current_tenant
@@ -5,6 +6,53 @@ from tests.integration.common_utils.reset import downgrade_postgres
 from tests.integration.common_utils.reset import upgrade_postgres


+class ToolSeedingExpectedResult(BaseModel):
+    name: str
+    display_name: str
+    in_code_tool_id: str
+    user_id: str | None
+
+
+EXPECTED_TOOLS = {
+    "SearchTool": ToolSeedingExpectedResult(
+        name="internal_search",
+        display_name="Internal Search",
+        in_code_tool_id="SearchTool",
+        user_id=None,
+    ),
+    "ImageGenerationTool": ToolSeedingExpectedResult(
+        name="generate_image",
+        display_name="Image Generation",
+        in_code_tool_id="ImageGenerationTool",
+        user_id=None,
+    ),
+    "WebSearchTool": ToolSeedingExpectedResult(
+        name="web_search",
+        display_name="Web Search",
+        in_code_tool_id="WebSearchTool",
+        user_id=None,
+    ),
+    "KnowledgeGraphTool": ToolSeedingExpectedResult(
+        name="run_kg_search",
+        display_name="Knowledge Graph Search",
+        in_code_tool_id="KnowledgeGraphTool",
+        user_id=None,
+    ),
+    "PythonTool": ToolSeedingExpectedResult(
+        name="python",
+        display_name="Code Interpreter",
+        in_code_tool_id="PythonTool",
+        user_id=None,
+    ),
+    "ResearchAgent": ToolSeedingExpectedResult(
+        name="research_agent",
+        display_name="Research Agent",
+        in_code_tool_id="ResearchAgent",
+        user_id=None,
+    ),
+}
+
+
 def test_tool_seeding_migration() -> None:
    """Test that migration from base to head correctly seeds builtin tools."""
    # Start from base and upgrade to just before tool seeding
@@ -49,56 +97,33 @@ def test_tool_seeding_migration() -> None:
            len(tools) == 8
        ), f"Should have created exactly 8 builtin tools, got {len(tools)}"

+        def validate_tool(expected: ToolSeedingExpectedResult) -> None:
+            tool = next((t for t in tools if t[1] == expected.name), None)
+            assert tool is not None, f"{expected.name} should exist"
+            assert (
+                tool[2] == expected.display_name
+            ), f"{expected.name} display name should be '{expected.display_name}'"
+            assert (
+                tool[4] == expected.in_code_tool_id
+            ), f"{expected.name} in_code_tool_id should be '{expected.in_code_tool_id}'"
+            assert (
+                tool[5] is None
+            ), f"{expected.name} should not have a user_id (builtin)"
+
        # Check SearchTool
-        search_tool = next((t for t in tools if t[1] == "SearchTool"), None)
-        assert search_tool is not None, "SearchTool should exist"
-        assert (
-            search_tool[2] == "Internal Search"
-        ), "SearchTool display name should be 'Internal Search'"
-        assert search_tool[5] is None, "SearchTool should not have a user_id (builtin)"
+        validate_tool(EXPECTED_TOOLS["SearchTool"])

        # Check ImageGenerationTool
-        img_tool = next((t for t in tools if t[1] == "ImageGenerationTool"), None)
-        assert img_tool is not None, "ImageGenerationTool should exist"
-        assert (
-            img_tool[2] == "Image Generation"
-        ), "ImageGenerationTool display name should be 'Image Generation'"
-        assert (
-            img_tool[5] is None
-        ), "ImageGenerationTool should not have a user_id (builtin)"
+        validate_tool(EXPECTED_TOOLS["ImageGenerationTool"])

        # Check WebSearchTool
-        web_tool = next((t for t in tools if t[1] == "WebSearchTool"), None)
-        assert web_tool is not None, "WebSearchTool should exist"
-        assert (
-            web_tool[2] == "Web Search"
-        ), "WebSearchTool display name should be 'Web Search'"
-        assert web_tool[5] is None, "WebSearchTool should not have a user_id (builtin)"
+        validate_tool(EXPECTED_TOOLS["WebSearchTool"])

        # Check KnowledgeGraphTool
-        kg_tool = next((t for t in tools if t[1] == "KnowledgeGraphTool"), None)
-        assert kg_tool is not None, "KnowledgeGraphTool should exist"
-        assert (
-            kg_tool[2] == "Knowledge Graph Search"
-        ), "KnowledgeGraphTool display name should be 'Knowledge Graph Search'"
-        assert (
-            kg_tool[5] is None
-        ), "KnowledgeGraphTool should not have a user_id (builtin)"
+        validate_tool(EXPECTED_TOOLS["KnowledgeGraphTool"])

        # Check PythonTool
-        python_tool = next((t for t in tools if t[1] == "PythonTool"), None)
-        assert python_tool is not None, "PythonTool should exist"
-        assert (
-            python_tool[2] == "Code Interpreter"
-        ), "PythonTool display name should be 'Code Interpreter'"
-        assert python_tool[5] is None, "PythonTool should not have a user_id (builtin)"
+        validate_tool(EXPECTED_TOOLS["PythonTool"])

        # Check ResearchAgent (Deep Research as a tool)
-        research_agent = next((t for t in tools if t[1] == "ResearchAgent"), None)
-        assert research_agent is not None, "ResearchAgent should exist"
-        assert (
-            research_agent[2] == "Research Agent"
-        ), "ResearchAgent display name should be 'Research Agent'"
-        assert (
-            research_agent[5] is None
-        ), "ResearchAgent should not have a user_id (builtin)"
+        validate_tool(EXPECTED_TOOLS["ResearchAgent"])
--- a/backend/tests/integration/tests/personas/test_unified_assistant.py
+++ b/backend/tests/integration/tests/personas/test_unified_assistant.py
@@ -38,11 +38,11 @@ def test_unified_assistant(reset: None, admin_user: DATestUser) -> None:
    # Verify tools
    tools = unified_assistant.tools
    tool_names = [tool.name for tool in tools]
-    assert "SearchTool" in tool_names, "SearchTool not found in unified assistant"
+    assert "internal_search" in tool_names, "SearchTool not found in unified assistant"
    assert (
-        "ImageGenerationTool" in tool_names
+        "generate_image" in tool_names
    ), "ImageGenerationTool not found in unified assistant"
-    assert "WebSearchTool" in tool_names, "WebSearchTool not found in unified assistant"
+    assert "web_search" in tool_names, "WebSearchTool not found in unified assistant"

    # Verify no starter messages
    starter_messages = unified_assistant.starter_messages or []
--- a/backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py
+++ b/backend/tests/integration/tests/streaming_endpoints/test_chat_stream.py
@@ -1,8 +1,17 @@
+import time
+
+from onyx.configs.constants import MessageType
 from tests.integration.common_utils.managers.chat import ChatSessionManager
 from tests.integration.common_utils.managers.llm_provider import LLMProviderManager
 from tests.integration.common_utils.test_models import DATestUser
 from tests.integration.conftest import DocumentBuilderType

+TERMINATED_RESPONSE_MESSAGE = (
+    "Response was terminated prior to completion, try regenerating."
+)
+
+LOADING_RESPONSE_MESSAGE = "Message is loading... Please refresh the page soon."
+

 def test_send_two_messages(basic_user: DATestUser) -> None:
    # Create a chat session
@@ -104,3 +113,59 @@ def test_send_message__basic_searches(
    # short doc should be more relevant and thus first
    assert response.top_documents[0].document_id == short_doc.id
    assert response.top_documents[1].document_id == long_doc.id
+
+
+def test_send_message_disconnect_and_cleanup(
+    reset: None, admin_user: DATestUser
+) -> None:
+    """
+    Test that when a client disconnects mid-stream:
+    1. Client sends a message and disconnects after receiving just 1 packet
+    2. Client checks to see that their message ends up completed
+
+    Note: There is an interim period (between disconnect and checkup) where we expect
+    to see some sort of 'loading' message.
+    """
+    LLMProviderManager.create(user_performing_action=admin_user)
+
+    test_chat_session = ChatSessionManager.create(user_performing_action=admin_user)
+
+    # Send a message and disconnect after receiving just 1 packet
+    ChatSessionManager.send_message_with_disconnect(
+        chat_session_id=test_chat_session.id,
+        message="What are some important events that happened today?",
+        user_performing_action=admin_user,
+        disconnect_after_packets=1,
+    )
+
+    # Every 5 seconds, check if we have the latest state of the chat session up to a minute
+    increment_seconds = 1
+    max_seconds = 60
+    msg = TERMINATED_RESPONSE_MESSAGE
+
+    for _ in range(max_seconds // increment_seconds):
+        time.sleep(increment_seconds)
+
+        # Get the chat history
+        chat_history = ChatSessionManager.get_chat_history(
+            chat_session=test_chat_session,
+            user_performing_action=admin_user,
+        )
+
+        # Find the assistant message
+        assistant_message = None
+        for chat_obj in chat_history:
+            if chat_obj.message_type == MessageType.ASSISTANT:
+                assistant_message = chat_obj
+                break
+
+        assert assistant_message is not None, "Assistant message should exist"
+        msg = assistant_message.message
+
+        if msg != TERMINATED_RESPONSE_MESSAGE and msg != LOADING_RESPONSE_MESSAGE:
+            break
+
+    assert msg != TERMINATED_RESPONSE_MESSAGE and msg != LOADING_RESPONSE_MESSAGE, (
+        f"Assistant message should no longer be the terminated response message after cleanup, "
+        f"got: {msg}"
+    )
--- a/backend/tests/integration/tests/web_search/test_web_search_api.py
+++ b/backend/tests/integration/tests/web_search/test_web_search_api.py
@@ -71,10 +71,10 @@ class TestOnyxWebCrawler:
        assert response.status_code == 200, response.text
        data = response.json()

-        # Should return a result but with empty content
-        assert len(data["results"]) == 1
-        result = data["results"][0]
-        assert result["content"] == ""
+        assert data["provider_type"] == WebContentProviderType.ONYX_WEB_CRAWLER.value
+
+        # The API filters out docs with no title/content, so unreachable domains return no results
+        assert data["results"] == []

    def test_handles_404_page(self, admin_user: DATestUser) -> None:
        """Test that the crawler handles 404 responses gracefully."""
@@ -86,8 +86,10 @@ class TestOnyxWebCrawler:
        assert response.status_code == 200, response.text
        data = response.json()

-        # Should return a result (possibly with empty content for 404)
-        assert len(data["results"]) == 1
+        assert data["provider_type"] == WebContentProviderType.ONYX_WEB_CRAWLER.value
+
+        # Non-200 responses are treated as non-content and filtered out
+        assert data["results"] == []

    def test_https_url_with_path(self, admin_user: DATestUser) -> None:
        """Test that the crawler handles HTTPS URLs with paths correctly."""
--- a/backend/tests/unit/onyx/tools/test_tool_runner.py
+++ b/backend/tests/unit/onyx/tools/test_tool_runner.py
@@ -0,0 +1,309 @@
+from onyx.server.query_and_chat.placement import Placement
+from onyx.tools.models import ToolCallKickoff
+from onyx.tools.tool_runner import _merge_tool_calls
+
+
+def _make_tool_call(
+    tool_name: str,
+    tool_args: dict,
+    tool_call_id: str = "call_1",
+    turn_index: int = 0,
+    tab_index: int = 0,
+) -> ToolCallKickoff:
+    """Helper to create a ToolCallKickoff for testing."""
+    return ToolCallKickoff(
+        tool_call_id=tool_call_id,
+        tool_name=tool_name,
+        tool_args=tool_args,
+        placement=Placement(turn_index=turn_index, tab_index=tab_index),
+    )
+
+
+class TestMergeToolCalls:
+    """Tests for _merge_tool_calls function."""
+
+    def test_empty_list(self) -> None:
+        """Empty input returns empty output."""
+        result = _merge_tool_calls([])
+        assert result == []
+
+    def test_single_search_tool_call_not_merged(self) -> None:
+        """A single SearchTool call is returned as-is (no merging needed)."""
+        call = _make_tool_call(
+            tool_name="internal_search",
+            tool_args={"queries": ["query1"]},
+            tool_call_id="call_1",
+        )
+        result = _merge_tool_calls([call])
+
+        assert len(result) == 1
+        assert result[0].tool_name == "internal_search"
+        assert result[0].tool_args == {"queries": ["query1"]}
+        assert result[0].tool_call_id == "call_1"
+
+    def test_single_web_search_tool_call_not_merged(self) -> None:
+        """A single WebSearchTool call is returned as-is."""
+        call = _make_tool_call(
+            tool_name="web_search",
+            tool_args={"queries": ["web query"]},
+        )
+        result = _merge_tool_calls([call])
+
+        assert len(result) == 1
+        assert result[0].tool_name == "web_search"
+        assert result[0].tool_args == {"queries": ["web query"]}
+
+    def test_single_open_url_tool_call_not_merged(self) -> None:
+        """A single OpenURLTool call is returned as-is."""
+        call = _make_tool_call(
+            tool_name="open_url",
+            tool_args={"urls": ["https://example.com"]},
+        )
+        result = _merge_tool_calls([call])
+
+        assert len(result) == 1
+        assert result[0].tool_name == "open_url"
+        assert result[0].tool_args == {"urls": ["https://example.com"]}
+
+    def test_multiple_search_tool_calls_merged(self) -> None:
+        """Multiple SearchTool calls have their queries merged into one call."""
+        calls = [
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["query1", "query2"]},
+                tool_call_id="call_1",
+            ),
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["query3"]},
+                tool_call_id="call_2",
+            ),
+        ]
+        result = _merge_tool_calls(calls)
+
+        assert len(result) == 1
+        assert result[0].tool_name == "internal_search"
+        assert result[0].tool_args["queries"] == ["query1", "query2", "query3"]
+        # Uses first call's ID
+        assert result[0].tool_call_id == "call_1"
+
+    def test_multiple_web_search_tool_calls_merged(self) -> None:
+        """Multiple WebSearchTool calls have their queries merged."""
+        calls = [
+            _make_tool_call(
+                tool_name="web_search",
+                tool_args={"queries": ["web1"]},
+                tool_call_id="call_1",
+            ),
+            _make_tool_call(
+                tool_name="web_search",
+                tool_args={"queries": ["web2", "web3"]},
+                tool_call_id="call_2",
+            ),
+        ]
+        result = _merge_tool_calls(calls)
+
+        assert len(result) == 1
+        assert result[0].tool_name == "web_search"
+        assert result[0].tool_args["queries"] == ["web1", "web2", "web3"]
+
+    def test_multiple_open_url_tool_calls_merged(self) -> None:
+        """Multiple OpenURLTool calls have their urls merged."""
+        calls = [
+            _make_tool_call(
+                tool_name="open_url",
+                tool_args={"urls": ["https://a.com"]},
+                tool_call_id="call_1",
+            ),
+            _make_tool_call(
+                tool_name="open_url",
+                tool_args={"urls": ["https://b.com", "https://c.com"]},
+                tool_call_id="call_2",
+            ),
+        ]
+        result = _merge_tool_calls(calls)
+
+        assert len(result) == 1
+        assert result[0].tool_name == "open_url"
+        assert result[0].tool_args["urls"] == [
+            "https://a.com",
+            "https://b.com",
+            "https://c.com",
+        ]
+
+    def test_non_mergeable_tool_not_merged(self) -> None:
+        """Non-mergeable tools (e.g., python) are returned as separate calls."""
+        calls = [
+            _make_tool_call(
+                tool_name="python",
+                tool_args={"code": "print(1)"},
+                tool_call_id="call_1",
+            ),
+            _make_tool_call(
+                tool_name="python",
+                tool_args={"code": "print(2)"},
+                tool_call_id="call_2",
+            ),
+        ]
+        result = _merge_tool_calls(calls)
+
+        assert len(result) == 2
+        assert result[0].tool_args["code"] == "print(1)"
+        assert result[1].tool_args["code"] == "print(2)"
+
+    def test_mixed_mergeable_and_non_mergeable(self) -> None:
+        """Mix of mergeable and non-mergeable tools handles correctly."""
+        calls = [
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["q1"]},
+                tool_call_id="search_1",
+            ),
+            _make_tool_call(
+                tool_name="python",
+                tool_args={"code": "x = 1"},
+                tool_call_id="python_1",
+            ),
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["q2"]},
+                tool_call_id="search_2",
+            ),
+        ]
+        result = _merge_tool_calls(calls)
+
+        # Should have 2 calls: merged search + python
+        assert len(result) == 2
+
+        tool_names = {r.tool_name for r in result}
+        assert tool_names == {"internal_search", "python"}
+
+        search_result = next(r for r in result if r.tool_name == "internal_search")
+        assert search_result.tool_args["queries"] == ["q1", "q2"]
+
+        python_result = next(r for r in result if r.tool_name == "python")
+        assert python_result.tool_args["code"] == "x = 1"
+
+    def test_multiple_different_mergeable_tools(self) -> None:
+        """Multiple different mergeable tools each get merged separately."""
+        calls = [
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["search1"]},
+            ),
+            _make_tool_call(
+                tool_name="web_search",
+                tool_args={"queries": ["web1"]},
+            ),
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["search2"]},
+            ),
+            _make_tool_call(
+                tool_name="web_search",
+                tool_args={"queries": ["web2"]},
+            ),
+        ]
+        result = _merge_tool_calls(calls)
+
+        # Should have 2 merged calls
+        assert len(result) == 2
+
+        search_result = next(r for r in result if r.tool_name == "internal_search")
+        assert search_result.tool_args["queries"] == ["search1", "search2"]
+
+        web_result = next(r for r in result if r.tool_name == "web_search")
+        assert web_result.tool_args["queries"] == ["web1", "web2"]
+
+    def test_preserves_first_call_placement(self) -> None:
+        """Merged call uses the placement from the first call."""
+        calls = [
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["q1"]},
+                turn_index=1,
+                tab_index=2,
+            ),
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["q2"]},
+                turn_index=3,
+                tab_index=4,
+            ),
+        ]
+        result = _merge_tool_calls(calls)
+
+        assert len(result) == 1
+        assert result[0].placement.turn_index == 1
+        assert result[0].placement.tab_index == 2
+
+    def test_preserves_other_args_from_first_call(self) -> None:
+        """Merged call preserves non-merge-field args from the first call."""
+        calls = [
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["q1"], "other_param": "value1"},
+            ),
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["q2"], "other_param": "value2"},
+            ),
+        ]
+        result = _merge_tool_calls(calls)
+
+        assert len(result) == 1
+        assert result[0].tool_args["queries"] == ["q1", "q2"]
+        # Other params from first call are preserved
+        assert result[0].tool_args["other_param"] == "value1"
+
+    def test_handles_empty_queries_list(self) -> None:
+        """Handles calls with empty queries lists."""
+        calls = [
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": []},
+            ),
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["q1"]},
+            ),
+        ]
+        result = _merge_tool_calls(calls)
+
+        assert len(result) == 1
+        assert result[0].tool_args["queries"] == ["q1"]
+
+    def test_handles_missing_merge_field(self) -> None:
+        """Handles calls where the merge field is missing entirely."""
+        calls = [
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={},  # No queries field
+            ),
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["q1"]},
+            ),
+        ]
+        result = _merge_tool_calls(calls)
+
+        assert len(result) == 1
+        assert result[0].tool_args["queries"] == ["q1"]
+
+    def test_handles_string_value_instead_of_list(self) -> None:
+        """Handles edge case where merge field is a string instead of list."""
+        calls = [
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": "single_query"},  # String instead of list
+            ),
+            _make_tool_call(
+                tool_name="internal_search",
+                tool_args={"queries": ["q2"]},
+            ),
+        ]
+        result = _merge_tool_calls(calls)
+
+        assert len(result) == 1
+        # String should be converted to list item
+        assert result[0].tool_args["queries"] == ["single_query", "q2"]
--- a/deployment/docker_compose/docker-compose.dev.yml
+++ b/deployment/docker_compose/docker-compose.dev.yml
@@ -2,11 +2,11 @@
 # This file exposes service ports for development and testing purposes
 #
 # Usage:
-#   docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+#   docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait
 #
 # Or set COMPOSE_FILE environment variable:
 #   export COMPOSE_FILE=docker-compose.yml:docker-compose.dev.yml
-#   docker compose up -d
+#   docker compose up -d --wait

 services:
  api_server:
--- a/deployment/docker_compose/docker-compose.yml
+++ b/deployment/docker_compose/docker-compose.yml
@@ -58,7 +58,7 @@ services:
      - minio
    restart: unless-stopped
    # DEV: To expose ports, either:
-    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait
    # 2. Uncomment the ports below
    # ports:
    #   - "8080:8080"
@@ -83,7 +83,13 @@ services:
        max-size: "50m"
        max-file: "6"
    healthcheck:
-      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')"]
+      test:
+        [
+          "CMD",
+          "python",
+          "-c",
+          "import urllib.request; urllib.request.urlopen('http://localhost:8080/health')",
+        ]
      interval: 30s
      timeout: 20s
      retries: 3
@@ -299,7 +305,7 @@ services:
      - POSTGRES_USER=${POSTGRES_USER:-postgres}
      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}
    # DEV: To expose ports, either:
-    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait
    # 2. Uncomment the ports below
    # ports:
    #   - "5432:5432"
@@ -321,7 +327,7 @@ services:
    environment:
      - VESPA_SKIP_UPGRADE_CHECK=${VESPA_SKIP_UPGRADE_CHECK:-true}
    # DEV: To expose ports, either:
-    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait
    # 2. Uncomment the ports below
    # ports:
    #   - "19071:19071"
@@ -378,7 +384,7 @@ services:
    image: redis:7.4-alpine
    restart: unless-stopped
    # DEV: To expose ports, either:
-    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait
    # 2. Uncomment the ports below
    # ports:
    #   - "6379:6379"
@@ -396,7 +402,7 @@ services:
    image: minio/minio:RELEASE.2025-07-23T15-54-02Z-cpuv1
    restart: unless-stopped
    # DEV: To expose ports, either:
-    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d
+    # 1. Use docker-compose.dev.yml: docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --wait
    # 2. Uncomment the ports below
    # ports:
    #   - "9004:9000"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,8 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "onyx"
 version = "0.0.0"
-# TODO(jamison): Upgrade dependencies until they're compatible with python >3.13.
-requires-python = ">=3.11,<3.13"
+requires-python = ">=3.11"
 # Shared dependencies between backend and model_server
 dependencies = [
    "aioboto3==15.1.0",
@@ -91,7 +90,7 @@ backend = [
    "python-dateutil==2.8.2",
    "python-gitlab==5.6.0",
    "python-pptx==0.6.23",
-    "pypdf==6.1.3",
+    "pypdf==6.6.0",
    "pytest-mock==3.12.0",
    "pytest-playwright==0.7.0",
    "python-docx==1.1.2",
@@ -111,8 +110,8 @@ backend = [
    "tiktoken==0.7.0",
    "timeago==1.0.16",
    "types-openpyxl==3.0.4.7",
-    "unstructured==0.15.1",
-    "unstructured-client==0.25.4",
+    "unstructured==0.18.27",
+    "unstructured-client==0.42.6",
    "zulip==0.8.2",
    "hubspot-api-client==11.1.0",
    "asana==5.0.8",
@@ -181,7 +180,7 @@ ee = [
 model_server = [
    "accelerate==1.6.0",
    "einops==0.8.1",
-    "numpy==1.26.4",
+    "numpy==2.4.1",
    "safetensors==0.5.3",
    "sentence-transformers==4.0.2",
    "torch==2.6.0",
--- a/uv.lock
+++ b/uv.lock
--- a/web/jest.config.js
+++ b/web/jest.config.js
@@ -140,6 +140,7 @@ module.exports = {
        "**/src/**/codeUtils.test.ts",
        "**/src/lib/**/*.test.ts",
        "**/src/app/**/services/*.test.ts",
+        "**/src/refresh-components/**/*.test.ts",
        // Add more patterns here as you add more unit tests
      ],
    },
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -40,6 +40,7 @@
        "@sentry/nextjs": "^10.22.0",
        "@sentry/tracing": "^7.120.3",
        "@stripe/stripe-js": "^4.6.0",
+        "@tailwindcss/container-queries": "^0.1.1",
        "@tanstack/react-table": "^8.21.3",
        "autoprefixer": "^10.4.22",
        "class-variance-authority": "^0.7.0",
@@ -5875,6 +5876,15 @@
        "tslib": "^2.8.0"
      }
    },
+    "node_modules/@tailwindcss/container-queries": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/@tailwindcss/container-queries/-/container-queries-0.1.1.tgz",
+      "integrity": "sha512-p18dswChx6WnTSaJCSGx6lTmrGzNNvm2FtXmiO6AuA1V4U5REyoqwmT6kgAsIMdjo07QdAfYXHJ4hnMtfHzWgA==",
+      "license": "MIT",
+      "peerDependencies": {
+        "tailwindcss": ">=3.2.0"
+      }
+    },
    "node_modules/@tailwindcss/typography": {
      "version": "0.5.19",
      "dev": true,
@@ -10298,6 +10308,7 @@
    },
    "node_modules/fsevents": {
      "version": "2.3.2",
+      "dev": true,
      "license": "MIT",
      "optional": true,
      "os": [
--- a/web/package.json
+++ b/web/package.json
@@ -56,6 +56,7 @@
    "@sentry/nextjs": "^10.22.0",
    "@sentry/tracing": "^7.120.3",
    "@stripe/stripe-js": "^4.6.0",
+    "@tailwindcss/container-queries": "^0.1.1",
    "@tanstack/react-table": "^8.21.3",
    "autoprefixer": "^10.4.22",
    "class-variance-authority": "^0.7.0",
--- a/web/src/app/admin/api-key/OnyxApiKeyForm.tsx
+++ b/web/src/app/admin/api-key/OnyxApiKeyForm.tsx
@@ -83,11 +83,7 @@ export default function OnyxApiKeyForm({
                  can be added or changed later!
                </Text>

-                <TextFormField
-                  name="name"
-                  label="Name (optional):"
-                  autoCompleteDisabled={true}
-                />
+                <TextFormField name="name" label="Name (optional):" />

                <SelectorFormField
                  // defaultValue is managed by Formik
--- a/web/src/app/admin/configuration/image-generation/forms/ImageGenFormWrapper.tsx
+++ b/web/src/app/admin/configuration/image-generation/forms/ImageGenFormWrapper.tsx
@@ -294,7 +294,7 @@ export function ImageGenFormWrapper<T extends FormValues>({
            }
            isSubmitting={isSubmitting}
          >
-            <Form className="flex flex-col gap-0 bg-background-tint-01">
+            <Form className="flex flex-col gap-0 bg-background-tint-01 w-full">
              <div className="flex flex-col gap-4 w-full">
                {children(childProps)}
              </div>
--- a/web/src/app/admin/configuration/llm/forms/AnthropicForm.tsx
+++ b/web/src/app/admin/configuration/llm/forms/AnthropicForm.tsx
@@ -6,7 +6,7 @@ import {
  ProviderFormContext,
 } from "./components/FormWrapper";
 import { DisplayNameField } from "./components/DisplayNameField";
-import { ApiKeyField } from "./components/ApiKeyField";
+import PasswordInputTypeInField from "@/refresh-components/form/PasswordInputTypeInField";
 import { FormActionButtons } from "./components/FormActionButtons";
 import {
  buildDefaultInitialValues,
@@ -94,7 +94,7 @@ export function AnthropicForm({
                  <Form className={LLM_FORM_CLASS_NAME}>
                    <DisplayNameField disabled={!!existingLlmProvider} />

-                    <ApiKeyField />
+                    <PasswordInputTypeInField name="api_key" label="API Key" />

                    <DisplayModels
                      modelConfigurations={modelConfigurations}
--- a/web/src/app/admin/configuration/llm/forms/AzureForm.tsx
+++ b/web/src/app/admin/configuration/llm/forms/AzureForm.tsx
@@ -7,7 +7,7 @@ import {
  ProviderFormContext,
 } from "./components/FormWrapper";
 import { DisplayNameField } from "./components/DisplayNameField";
-import { ApiKeyField } from "./components/ApiKeyField";
+import PasswordInputTypeInField from "@/refresh-components/form/PasswordInputTypeInField";
 import { FormActionButtons } from "./components/FormActionButtons";
 import {
  buildDefaultInitialValues,
@@ -140,7 +140,7 @@ export function AzureForm({
                  <Form className={LLM_FORM_CLASS_NAME}>
                    <DisplayNameField disabled={!!existingLlmProvider} />

-                    <ApiKeyField />
+                    <PasswordInputTypeInField name="api_key" label="API Key" />

                    <TextFormField
                      name="target_uri"
--- a/web/src/app/admin/configuration/llm/forms/BedrockForm.tsx
+++ b/web/src/app/admin/configuration/llm/forms/BedrockForm.tsx
@@ -1,6 +1,7 @@
 import { useState, useEffect } from "react";
 import { Form, Formik, FormikProps } from "formik";
 import { SelectorFormField, TextFormField } from "@/components/Field";
+import PasswordInputTypeInField from "@/refresh-components/form/PasswordInputTypeInField";
 import {
  LLMProviderFormProps,
  LLMProviderView,
@@ -193,11 +194,10 @@ function BedrockFormInternals({
                label="AWS Access Key ID"
                placeholder="AKIAIOSFODNN7EXAMPLE"
              />
-              <TextFormField
+              <PasswordInputTypeInField
                name={FIELD_AWS_SECRET_ACCESS_KEY}
                label="AWS Secret Access Key"
                placeholder="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
-                type="password"
              />
            </div>
          </TabsContent>
@@ -210,11 +210,10 @@ function BedrockFormInternals({
            )}
          >
            <div className="flex flex-col gap-4">
-              <TextFormField
+              <PasswordInputTypeInField
                name={FIELD_AWS_BEARER_TOKEN_BEDROCK}
                label="AWS Bedrock Long-term API Key"
                placeholder="Your long-term API key"
-                type="password"
              />
            </div>
          </TabsContent>
--- a/web/src/app/admin/configuration/llm/forms/CustomForm.tsx
+++ b/web/src/app/admin/configuration/llm/forms/CustomForm.tsx
@@ -11,7 +11,7 @@ import { LLMProviderFormProps, LLMProviderView } from "../interfaces";
 import * as Yup from "yup";
 import { ProviderFormEntrypointWrapper } from "./components/FormWrapper";
 import { DisplayNameField } from "./components/DisplayNameField";
-import { ApiKeyField } from "./components/ApiKeyField";
+import PasswordInputTypeInField from "@/refresh-components/form/PasswordInputTypeInField";
 import { FormActionButtons } from "./components/FormActionButtons";
 import {
  submitLLMProvider,
@@ -190,7 +190,10 @@ export function CustomForm({
                      determine which fields are required.
                    </Text>

-                    <ApiKeyField label="[Optional] API Key" />
+                    <PasswordInputTypeInField
+                      name="api_key"
+                      label="[Optional] API Key"
+                    />

                    <TextFormField
                      name="api_base"
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Dane Urban	1c5d6b88e3	nit	2026-01-12 16:45:04 -08:00
roshan	9021c607f2	chore(dr): finer grained tracing for clarification step, research plan step, and orchestration step (#7374 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2026-01-12 23:58:27 +00:00
Jamison Lahman	c03b0d80fd	chore(deps): remove `requires-python < 3.13` (#7367 )	2026-01-12 23:21:02 +00:00
acaprau	fcf0b316a4	feat(opensearch): More feature parity (#7286 )	2026-01-12 23:01:55 +00:00
Jamison Lahman	157f672b4b	chore(deps): upgrade `numpy`, `unstructured`, `unstructured-client` (#7369 )	2026-01-12 22:58:11 +00:00
dependabot[bot]	51b9484b96	chore(deps): bump actions/upload-artifact from 5.0.0 to 6.0.0 (#6964 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-01-12 21:53:48 +00:00
Dane Urban	2c881c8b61	fix test issues	2026-01-12 13:52:00 -08:00
Dane Urban	3725e3485c	Merge branch 'main' into tool_name_migration	2026-01-12 13:49:58 -08:00
Danelegend	0c8f55c049	fix(tools): persist enabled tools in ui (#7347 )	2026-01-12 21:47:29 +00:00
dependabot[bot]	c7be2571d1	chore(deps): bump tauri-apps/tauri-action from 0.6.0 to 0.6.1 (#7371 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-01-12 13:48:46 -08:00
Dane Urban	d72fd84bcd	nit	2026-01-12 13:39:48 -08:00
dependabot[bot]	4948b6cca9	chore(deps): bump actions/stale from 10.1.0 to 10.1.1 (#6965 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>	2026-01-12 13:12:24 -08:00
Dane Urban	6b7d560b0c	change to list	2026-01-12 13:09:43 -08:00
Jamison Lahman	638ea5f316	chore(deps): fix `uv-lock` hook (#7368 )	2026-01-12 12:52:17 -08:00
dependabot[bot]	6e3268ca75	chore(deps): bump pypdf from 6.1.3 to 6.6.0 (#7319 ) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jamison Lahman <jamison@lahman.dev>	2026-01-12 20:36:47 +00:00
Wenxi	d8921df60c	fix: onboarding modal styling (#7363 )	2026-01-12 20:29:23 +00:00
Yuhong Sun	693d9f5f69	fix: Editing First Message (#7366 )	2026-01-12 19:45:01 +00:00
Jamison Lahman	02e17871cc	chore(devtools): recommend starting dev dockers with `--wait` (#7365 )	2026-01-12 19:13:00 +00:00
Wenxi	209cfd00b0	fix: only show latest release notification for nightly versions (#7362 )	2026-01-12 11:10:28 -08:00
Dane Urban	2af27ba1f5	.	2026-01-12 10:58:38 -08:00
Jessica Singh	cd36baa484	fix(web search): removing site: operator from exa query (#7248 )	2026-01-12 18:22:18 +00:00
Dane Urban	75ac128836	make up to date	2026-01-12 09:56:52 -08:00
Dane Urban	a688f4ee81	Merge branch 'main' into tool_name_migration	2026-01-12 09:55:38 -08:00
Dane Urban	ca54a85929	update revision	2026-01-12 09:35:04 -08:00
Raunak Bhagat	c78fe275af	refactor: Popover cleanup (#7356 )	2026-01-12 12:08:30 +00:00
Raunak Bhagat	c935c4808f	fix: More actions cards fixes (#7358 )	2026-01-12 03:27:42 -08:00
Raunak Bhagat	4ebcfef541	fix: Fix actions cards (#7357 )	2026-01-12 10:57:22 +00:00
SubashMohan	e320ef9d9c	Fix/agent creation files (#7346 )	2026-01-12 07:00:47 +00:00
Nikolas Garza	9e02438af5	chore: standardize password/secret inputs and update per design docs (#7316 )	2026-01-12 06:26:09 +00:00
Dane Urban	3571a8d39a	fix revision id	2026-01-11 18:13:32 -08:00
Danelegend	177e097ddb	fix(chat): newly created chats being marked as failed (#7310 ) Co-authored-by: Dane Urban <durban@Danes-MacBook-Pro.local>	2026-01-12 02:02:49 +00:00
Dane Urban	08ac2f2e20	remove try-except	2026-01-11 18:00:57 -08:00
Danelegend	9b47267d48	Delete backend/alembic/versions/19a896a66d3f_tool_name_consistency.py	2026-01-11 17:56:00 -08:00
Dane Urban	c57ee34a33	Change this	2026-01-11 17:55:26 -08:00
Dane Urban	318a66a189	Add comment	2026-01-11 17:39:29 -08:00
Dane Urban	edf9e68abf	nit	2026-01-11 17:36:09 -08:00
Wenxi	9ecd47ec31	feat: in app notifications for changelog (#7253 )	2026-01-12 01:09:04 +00:00
Nikolas Garza	83f3d29b10	fix: stop federated OAuth modal from appearing permanently after skips (#7351 )	2026-01-11 22:20:13 +00:00
Yuhong Sun	12e668cc0f	feat: Deep Research Replay (#7340 )	2026-01-11 22:17:09 +00:00
SubashMohan	afe8376d5e	feat: Exclude image generation providers from LLM fetch in API calls (#7348 )	2026-01-11 21:13:25 +00:00
Wenxi	082ef3e096	fix: always start onboarding at first step and track by user (#7315 )	2026-01-11 21:03:17 +00:00
Nikolas Garza	cb2951a1c0	perf: switch BeautifulSoup parser from html.parser to lxml for web crawler (#7350 )	2026-01-11 20:46:35 +00:00
Corey Auger	eda5598af5	fix: update docs link (#7349 ) Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>	2026-01-11 12:44:48 -08:00
Justin Tahara	0bbb4b6988	fix(ui): Action Strikethrough when not configured (#7273 )	2026-01-11 11:21:17 +00:00
Jamison Lahman	4768aadb20	refactor(fe): WelcomeMessage nits (#7344 )	2026-01-10 22:01:48 -08:00
Jamison Lahman	e05e85e782	fix(fe): "Pick a date range" button wrapping (#7343 )	2026-01-10 21:22:20 -08:00
Jamison Lahman	6408f61307	fix(fe): avoid internal table scroll on query history page (#7342 )	2026-01-10 20:39:17 -08:00
Jamison Lahman	5a5cd51e4f	fix(fe): SidebarTabs are Links (#7341 )	2026-01-10 20:01:31 -08:00
Danelegend	7c047c47a0	fix(chat): Chat in-progress messages (#7318 ) Co-authored-by: Dane Urban <durban@Danes-MacBook-Pro.local>	2026-01-11 00:29:39 +00:00
Evan Lohn	22138bbb33	fix: vertex prompt caching (#7339 ) Co-authored-by: Weves <chrisweaver101@gmail.com>	2026-01-11 00:23:39 +00:00
Chris Weaver	7cff1064a8	chore: reenable auto update test (#7146 )	2026-01-10 16:00:48 -08:00
Wenxi	deeb6fdcd2	fix: anonymous users cookie and admin panel config (#7321 )	2026-01-10 15:12:27 -08:00
Chris Weaver	3e7f4e0aa5	fix: auto-sync (#7337 )	2026-01-10 13:43:40 -08:00
Raunak Bhagat	ac73671e35	refactor: Components updates (#7308 )	2026-01-10 06:30:39 +00:00
Raunak Bhagat	3c20d132e0	feat: Modal updates (#7306 )	2026-01-10 05:13:09 +00:00
Yuhong Sun	0e3e7eb4a2	feat: Create new chat session button after msg send (#7332 ) Co-authored-by: Raunak Bhagat <r@rabh.io>	2026-01-10 04:56:54 +00:00
Yuhong Sun	c85aebe8ab	Tables (#7333 )	2026-01-09 20:40:15 -08:00
Yuhong Sun	a47e6a3146	feat: Enable triple click on content in the chat (#7331 ) Co-authored-by: Raunak Bhagat <r@rabh.io>	2026-01-09 20:37:36 -08:00
Jamison Lahman	1e61737e03	fix(fe): Tags have consistent height on hover (#7328 )	2026-01-09 20:20:36 -08:00
Wenxi	c7fc1cd5ae	chore: allow tenant cleanup script to skip control plane if tenant not found (#7290 )	2026-01-10 00:17:26 +00:00
roshan	e2b60bf67c	feat(posthog): track message origin analytics in posthog (#7313 )	2026-01-10 00:11:17 +00:00
Danelegend	f4d4d14286	fix(chat): post llm loop callback (#7309 ) Co-authored-by: Dane Urban <durban@Danes-MacBook-Pro.local>	2026-01-09 23:53:22 +00:00
Yuhong Sun	1c24bc6ea2	Opensearch README (#7327 )	2026-01-09 15:53:22 -08:00
Yuhong Sun	cacbd18dcd	feat: Opensearch README (#7325 )	2026-01-09 15:28:08 -08:00
Nikolas Garza	8527b83b15	fix(sidebar): Allow unpinning all agents and fix icon flicker (#7241 )	2026-01-09 14:20:46 -08:00
Nikolas Garza	33e37a1846	fix: make autocomplete opt in (#7317 )	2026-01-09 20:04:22 +00:00
Jamison Lahman	d454d8a878	fix(chat): wide tables can be scrolled (#7311 )	2026-01-09 19:07:40 +00:00
roshan	00ad65a6a8	feat: chrome extension (#6704 )	2026-01-09 18:45:23 +00:00
Nikolas Garza	dac60d403c	fix(chat): show "User has stopped generation" indicator when user cancels (#7312 )	2026-01-09 18:14:35 +00:00
Evan Lohn	6256b2854d	chore: bump indexing usage (#7307 )	2026-01-09 17:46:27 +00:00
Danelegend	8acb8e191d	fix(chat): use url when name unknown (#7278 ) Co-authored-by: Dane Urban <durban@Danes-MacBook-Pro.local>	2026-01-09 17:16:20 +00:00
Evan Lohn	8c4cbddc43	fix: minor perm sync improvements (#7296 )	2026-01-09 05:46:23 +00:00
Yuhong Sun	f6cd006bd6	chore: Refactor tool exceptions (#7280 )	2026-01-09 04:01:12 +00:00
Jamison Lahman	0033934319	chore(perf): remove isEqual memoization check (#7304 ) Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>	2026-01-09 03:20:37 +00:00
Raunak Bhagat	ff87b79d14	fix: `Section` layout component fix (#7305 )	2026-01-08 19:25:33 -08:00
Raunak Bhagat	ebf18af7c9	refactor: UI components cleanup (#7301 ) Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>	2026-01-09 03:09:20 +00:00
Raunak Bhagat	cf67ae962c	feat: Add a new `GeneralLayouts` file and update layout components (#7297 ) Co-authored-by: Nikolas Garza <90273783+nmgarza5@users.noreply.github.com>	2026-01-09 02:50:21 +00:00